161145dc2SMartin Matuska // SPDX-License-Identifier: CDDL-1.0
2eac7052fSMatt Macy /*
3eac7052fSMatt Macy * CDDL HEADER START
4eac7052fSMatt Macy *
5eac7052fSMatt Macy * The contents of this file are subject to the terms of the
6eac7052fSMatt Macy * Common Development and Distribution License (the "License").
7eac7052fSMatt Macy * You may not use this file except in compliance with the License.
8eac7052fSMatt Macy *
9eac7052fSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10271171e0SMartin Matuska * or https://opensource.org/licenses/CDDL-1.0.
11eac7052fSMatt Macy * See the License for the specific language governing permissions
12eac7052fSMatt Macy * and limitations under the License.
13eac7052fSMatt Macy *
14eac7052fSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each
15eac7052fSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16eac7052fSMatt Macy * If applicable, add the following below this CDDL HEADER, with the
17eac7052fSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying
18eac7052fSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner]
19eac7052fSMatt Macy *
20eac7052fSMatt Macy * CDDL HEADER END
21eac7052fSMatt Macy */
22eac7052fSMatt Macy
23eac7052fSMatt Macy #include <sys/zfs_context.h>
24eac7052fSMatt Macy #include <sys/spa_impl.h>
25eac7052fSMatt Macy #include <sys/vdev_impl.h>
26eac7052fSMatt Macy #include <sys/spa.h>
27eac7052fSMatt Macy #include <zfs_comutil.h>
28eac7052fSMatt Macy
29eac7052fSMatt Macy /*
30eac7052fSMatt Macy * Keeps stats on last N reads per spa_t, disabled by default.
31eac7052fSMatt Macy */
32be181ee2SMartin Matuska static uint_t zfs_read_history = B_FALSE;
33eac7052fSMatt Macy
34eac7052fSMatt Macy /*
35eac7052fSMatt Macy * Include cache hits in history, disabled by default.
36eac7052fSMatt Macy */
37e92ffd9bSMartin Matuska static int zfs_read_history_hits = B_FALSE;
38eac7052fSMatt Macy
39eac7052fSMatt Macy /*
40eac7052fSMatt Macy * Keeps stats on the last 100 txgs by default.
41eac7052fSMatt Macy */
42be181ee2SMartin Matuska static uint_t zfs_txg_history = 100;
43eac7052fSMatt Macy
44eac7052fSMatt Macy /*
45eac7052fSMatt Macy * Keeps stats on the last N MMP updates, disabled by default.
46eac7052fSMatt Macy */
47be181ee2SMartin Matuska static uint_t zfs_multihost_history = B_FALSE;
48eac7052fSMatt Macy
49eac7052fSMatt Macy /*
50eac7052fSMatt Macy * ==========================================================================
51eac7052fSMatt Macy * SPA Read History Routines
52eac7052fSMatt Macy * ==========================================================================
53eac7052fSMatt Macy */
54eac7052fSMatt Macy
55eac7052fSMatt Macy /*
56eac7052fSMatt Macy * Read statistics - Information exported regarding each arc_read call
57eac7052fSMatt Macy */
58eac7052fSMatt Macy typedef struct spa_read_history {
59eac7052fSMatt Macy hrtime_t start; /* time read completed */
60eac7052fSMatt Macy uint64_t objset; /* read from this objset */
61eac7052fSMatt Macy uint64_t object; /* read of this object number */
62eac7052fSMatt Macy uint64_t level; /* block's indirection level */
63eac7052fSMatt Macy uint64_t blkid; /* read of this block id */
64eac7052fSMatt Macy char origin[24]; /* read originated from here */
65eac7052fSMatt Macy uint32_t aflags; /* ARC flags (cached, prefetch, etc.) */
66eac7052fSMatt Macy pid_t pid; /* PID of task doing read */
67eac7052fSMatt Macy char comm[16]; /* process name of task doing read */
68eac7052fSMatt Macy procfs_list_node_t srh_node;
69eac7052fSMatt Macy } spa_read_history_t;
70eac7052fSMatt Macy
71eac7052fSMatt Macy static int
spa_read_history_show_header(struct seq_file * f)72eac7052fSMatt Macy spa_read_history_show_header(struct seq_file *f)
73eac7052fSMatt Macy {
74eac7052fSMatt Macy seq_printf(f, "%-8s %-16s %-8s %-8s %-8s %-8s %-8s "
75eac7052fSMatt Macy "%-24s %-8s %-16s\n", "UID", "start", "objset", "object",
76eac7052fSMatt Macy "level", "blkid", "aflags", "origin", "pid", "process");
77eac7052fSMatt Macy
78eac7052fSMatt Macy return (0);
79eac7052fSMatt Macy }
80eac7052fSMatt Macy
81eac7052fSMatt Macy static int
spa_read_history_show(struct seq_file * f,void * data)82eac7052fSMatt Macy spa_read_history_show(struct seq_file *f, void *data)
83eac7052fSMatt Macy {
84eac7052fSMatt Macy spa_read_history_t *srh = (spa_read_history_t *)data;
85eac7052fSMatt Macy
86eac7052fSMatt Macy seq_printf(f, "%-8llu %-16llu 0x%-6llx "
87eac7052fSMatt Macy "%-8lli %-8lli %-8lli 0x%-6x %-24s %-8i %-16s\n",
88eac7052fSMatt Macy (u_longlong_t)srh->srh_node.pln_id, srh->start,
89eac7052fSMatt Macy (longlong_t)srh->objset, (longlong_t)srh->object,
90eac7052fSMatt Macy (longlong_t)srh->level, (longlong_t)srh->blkid,
91eac7052fSMatt Macy srh->aflags, srh->origin, srh->pid, srh->comm);
92eac7052fSMatt Macy
93eac7052fSMatt Macy return (0);
94eac7052fSMatt Macy }
95eac7052fSMatt Macy
96eac7052fSMatt Macy /* Remove oldest elements from list until there are no more than 'size' left */
97eac7052fSMatt Macy static void
spa_read_history_truncate(spa_history_list_t * shl,unsigned int size)98eac7052fSMatt Macy spa_read_history_truncate(spa_history_list_t *shl, unsigned int size)
99eac7052fSMatt Macy {
100eac7052fSMatt Macy spa_read_history_t *srh;
101eac7052fSMatt Macy while (shl->size > size) {
102eac7052fSMatt Macy srh = list_remove_head(&shl->procfs_list.pl_list);
103eac7052fSMatt Macy ASSERT3P(srh, !=, NULL);
104eac7052fSMatt Macy kmem_free(srh, sizeof (spa_read_history_t));
105eac7052fSMatt Macy shl->size--;
106eac7052fSMatt Macy }
107eac7052fSMatt Macy
108eac7052fSMatt Macy if (size == 0)
109eac7052fSMatt Macy ASSERT(list_is_empty(&shl->procfs_list.pl_list));
110eac7052fSMatt Macy }
111eac7052fSMatt Macy
112eac7052fSMatt Macy static int
spa_read_history_clear(procfs_list_t * procfs_list)113eac7052fSMatt Macy spa_read_history_clear(procfs_list_t *procfs_list)
114eac7052fSMatt Macy {
115eac7052fSMatt Macy spa_history_list_t *shl = procfs_list->pl_private;
116eac7052fSMatt Macy mutex_enter(&procfs_list->pl_lock);
117eac7052fSMatt Macy spa_read_history_truncate(shl, 0);
118eac7052fSMatt Macy mutex_exit(&procfs_list->pl_lock);
119eac7052fSMatt Macy return (0);
120eac7052fSMatt Macy }
121eac7052fSMatt Macy
122eac7052fSMatt Macy static void
spa_read_history_init(spa_t * spa)123eac7052fSMatt Macy spa_read_history_init(spa_t *spa)
124eac7052fSMatt Macy {
125eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.read_history;
126eac7052fSMatt Macy
127eac7052fSMatt Macy shl->size = 0;
128eac7052fSMatt Macy shl->procfs_list.pl_private = shl;
129c40487d4SMatt Macy procfs_list_install("zfs",
130c40487d4SMatt Macy spa_name(spa),
131eac7052fSMatt Macy "reads",
132eac7052fSMatt Macy 0600,
133eac7052fSMatt Macy &shl->procfs_list,
134eac7052fSMatt Macy spa_read_history_show,
135eac7052fSMatt Macy spa_read_history_show_header,
136eac7052fSMatt Macy spa_read_history_clear,
137eac7052fSMatt Macy offsetof(spa_read_history_t, srh_node));
138eac7052fSMatt Macy }
139eac7052fSMatt Macy
140eac7052fSMatt Macy static void
spa_read_history_destroy(spa_t * spa)141eac7052fSMatt Macy spa_read_history_destroy(spa_t *spa)
142eac7052fSMatt Macy {
143eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.read_history;
144eac7052fSMatt Macy procfs_list_uninstall(&shl->procfs_list);
145eac7052fSMatt Macy spa_read_history_truncate(shl, 0);
146eac7052fSMatt Macy procfs_list_destroy(&shl->procfs_list);
147eac7052fSMatt Macy }
148eac7052fSMatt Macy
149eac7052fSMatt Macy void
spa_read_history_add(spa_t * spa,const zbookmark_phys_t * zb,uint32_t aflags)150eac7052fSMatt Macy spa_read_history_add(spa_t *spa, const zbookmark_phys_t *zb, uint32_t aflags)
151eac7052fSMatt Macy {
152eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.read_history;
153eac7052fSMatt Macy spa_read_history_t *srh;
154eac7052fSMatt Macy
155eac7052fSMatt Macy ASSERT3P(spa, !=, NULL);
156eac7052fSMatt Macy ASSERT3P(zb, !=, NULL);
157eac7052fSMatt Macy
158eac7052fSMatt Macy if (zfs_read_history == 0 && shl->size == 0)
159eac7052fSMatt Macy return;
160eac7052fSMatt Macy
161eac7052fSMatt Macy if (zfs_read_history_hits == 0 && (aflags & ARC_FLAG_CACHED))
162eac7052fSMatt Macy return;
163eac7052fSMatt Macy
164eac7052fSMatt Macy srh = kmem_zalloc(sizeof (spa_read_history_t), KM_SLEEP);
165eac7052fSMatt Macy strlcpy(srh->comm, getcomm(), sizeof (srh->comm));
166eac7052fSMatt Macy srh->start = gethrtime();
167eac7052fSMatt Macy srh->objset = zb->zb_objset;
168eac7052fSMatt Macy srh->object = zb->zb_object;
169eac7052fSMatt Macy srh->level = zb->zb_level;
170eac7052fSMatt Macy srh->blkid = zb->zb_blkid;
171eac7052fSMatt Macy srh->aflags = aflags;
172eac7052fSMatt Macy srh->pid = getpid();
173eac7052fSMatt Macy
174eac7052fSMatt Macy mutex_enter(&shl->procfs_list.pl_lock);
175eac7052fSMatt Macy
176eac7052fSMatt Macy procfs_list_add(&shl->procfs_list, srh);
177eac7052fSMatt Macy shl->size++;
178eac7052fSMatt Macy
179eac7052fSMatt Macy spa_read_history_truncate(shl, zfs_read_history);
180eac7052fSMatt Macy
181eac7052fSMatt Macy mutex_exit(&shl->procfs_list.pl_lock);
182eac7052fSMatt Macy }
183eac7052fSMatt Macy
184eac7052fSMatt Macy /*
185eac7052fSMatt Macy * ==========================================================================
186eac7052fSMatt Macy * SPA TXG History Routines
187eac7052fSMatt Macy * ==========================================================================
188eac7052fSMatt Macy */
189eac7052fSMatt Macy
190eac7052fSMatt Macy /*
191eac7052fSMatt Macy * Txg statistics - Information exported regarding each txg sync
192eac7052fSMatt Macy */
193eac7052fSMatt Macy
194eac7052fSMatt Macy typedef struct spa_txg_history {
195eac7052fSMatt Macy uint64_t txg; /* txg id */
196eac7052fSMatt Macy txg_state_t state; /* active txg state */
197eac7052fSMatt Macy uint64_t nread; /* number of bytes read */
198eac7052fSMatt Macy uint64_t nwritten; /* number of bytes written */
199eac7052fSMatt Macy uint64_t reads; /* number of read operations */
200eac7052fSMatt Macy uint64_t writes; /* number of write operations */
201eac7052fSMatt Macy uint64_t ndirty; /* number of dirty bytes */
202eac7052fSMatt Macy hrtime_t times[TXG_STATE_COMMITTED]; /* completion times */
203eac7052fSMatt Macy procfs_list_node_t sth_node;
204eac7052fSMatt Macy } spa_txg_history_t;
205eac7052fSMatt Macy
206eac7052fSMatt Macy static int
spa_txg_history_show_header(struct seq_file * f)207eac7052fSMatt Macy spa_txg_history_show_header(struct seq_file *f)
208eac7052fSMatt Macy {
209eac7052fSMatt Macy seq_printf(f, "%-8s %-16s %-5s %-12s %-12s %-12s "
210eac7052fSMatt Macy "%-8s %-8s %-12s %-12s %-12s %-12s\n", "txg", "birth", "state",
211eac7052fSMatt Macy "ndirty", "nread", "nwritten", "reads", "writes",
212eac7052fSMatt Macy "otime", "qtime", "wtime", "stime");
213eac7052fSMatt Macy return (0);
214eac7052fSMatt Macy }
215eac7052fSMatt Macy
216eac7052fSMatt Macy static int
spa_txg_history_show(struct seq_file * f,void * data)217eac7052fSMatt Macy spa_txg_history_show(struct seq_file *f, void *data)
218eac7052fSMatt Macy {
219eac7052fSMatt Macy spa_txg_history_t *sth = (spa_txg_history_t *)data;
220eac7052fSMatt Macy uint64_t open = 0, quiesce = 0, wait = 0, sync = 0;
221eac7052fSMatt Macy char state;
222eac7052fSMatt Macy
223eac7052fSMatt Macy switch (sth->state) {
224eac7052fSMatt Macy case TXG_STATE_BIRTH: state = 'B'; break;
225eac7052fSMatt Macy case TXG_STATE_OPEN: state = 'O'; break;
226eac7052fSMatt Macy case TXG_STATE_QUIESCED: state = 'Q'; break;
227eac7052fSMatt Macy case TXG_STATE_WAIT_FOR_SYNC: state = 'W'; break;
228eac7052fSMatt Macy case TXG_STATE_SYNCED: state = 'S'; break;
229eac7052fSMatt Macy case TXG_STATE_COMMITTED: state = 'C'; break;
230eac7052fSMatt Macy default: state = '?'; break;
231eac7052fSMatt Macy }
232eac7052fSMatt Macy
233eac7052fSMatt Macy if (sth->times[TXG_STATE_OPEN])
234eac7052fSMatt Macy open = sth->times[TXG_STATE_OPEN] -
235eac7052fSMatt Macy sth->times[TXG_STATE_BIRTH];
236eac7052fSMatt Macy
237eac7052fSMatt Macy if (sth->times[TXG_STATE_QUIESCED])
238eac7052fSMatt Macy quiesce = sth->times[TXG_STATE_QUIESCED] -
239eac7052fSMatt Macy sth->times[TXG_STATE_OPEN];
240eac7052fSMatt Macy
241eac7052fSMatt Macy if (sth->times[TXG_STATE_WAIT_FOR_SYNC])
242eac7052fSMatt Macy wait = sth->times[TXG_STATE_WAIT_FOR_SYNC] -
243eac7052fSMatt Macy sth->times[TXG_STATE_QUIESCED];
244eac7052fSMatt Macy
245eac7052fSMatt Macy if (sth->times[TXG_STATE_SYNCED])
246eac7052fSMatt Macy sync = sth->times[TXG_STATE_SYNCED] -
247eac7052fSMatt Macy sth->times[TXG_STATE_WAIT_FOR_SYNC];
248eac7052fSMatt Macy
249eac7052fSMatt Macy seq_printf(f, "%-8llu %-16llu %-5c %-12llu "
250eac7052fSMatt Macy "%-12llu %-12llu %-8llu %-8llu %-12llu %-12llu %-12llu %-12llu\n",
251eac7052fSMatt Macy (longlong_t)sth->txg, sth->times[TXG_STATE_BIRTH], state,
252eac7052fSMatt Macy (u_longlong_t)sth->ndirty,
253eac7052fSMatt Macy (u_longlong_t)sth->nread, (u_longlong_t)sth->nwritten,
254eac7052fSMatt Macy (u_longlong_t)sth->reads, (u_longlong_t)sth->writes,
255eac7052fSMatt Macy (u_longlong_t)open, (u_longlong_t)quiesce, (u_longlong_t)wait,
256eac7052fSMatt Macy (u_longlong_t)sync);
257eac7052fSMatt Macy
258eac7052fSMatt Macy return (0);
259eac7052fSMatt Macy }
260eac7052fSMatt Macy
261eac7052fSMatt Macy /* Remove oldest elements from list until there are no more than 'size' left */
262eac7052fSMatt Macy static void
spa_txg_history_truncate(spa_history_list_t * shl,unsigned int size)263eac7052fSMatt Macy spa_txg_history_truncate(spa_history_list_t *shl, unsigned int size)
264eac7052fSMatt Macy {
265eac7052fSMatt Macy spa_txg_history_t *sth;
266eac7052fSMatt Macy while (shl->size > size) {
267eac7052fSMatt Macy sth = list_remove_head(&shl->procfs_list.pl_list);
268eac7052fSMatt Macy ASSERT3P(sth, !=, NULL);
269eac7052fSMatt Macy kmem_free(sth, sizeof (spa_txg_history_t));
270eac7052fSMatt Macy shl->size--;
271eac7052fSMatt Macy }
272eac7052fSMatt Macy
273eac7052fSMatt Macy if (size == 0)
274eac7052fSMatt Macy ASSERT(list_is_empty(&shl->procfs_list.pl_list));
275eac7052fSMatt Macy
276eac7052fSMatt Macy }
277eac7052fSMatt Macy
278eac7052fSMatt Macy static int
spa_txg_history_clear(procfs_list_t * procfs_list)279eac7052fSMatt Macy spa_txg_history_clear(procfs_list_t *procfs_list)
280eac7052fSMatt Macy {
281eac7052fSMatt Macy spa_history_list_t *shl = procfs_list->pl_private;
282eac7052fSMatt Macy mutex_enter(&procfs_list->pl_lock);
283eac7052fSMatt Macy spa_txg_history_truncate(shl, 0);
284eac7052fSMatt Macy mutex_exit(&procfs_list->pl_lock);
285eac7052fSMatt Macy return (0);
286eac7052fSMatt Macy }
287eac7052fSMatt Macy
288eac7052fSMatt Macy static void
spa_txg_history_init(spa_t * spa)289eac7052fSMatt Macy spa_txg_history_init(spa_t *spa)
290eac7052fSMatt Macy {
291eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.txg_history;
292eac7052fSMatt Macy
293eac7052fSMatt Macy shl->size = 0;
294eac7052fSMatt Macy shl->procfs_list.pl_private = shl;
295c40487d4SMatt Macy procfs_list_install("zfs",
296c40487d4SMatt Macy spa_name(spa),
297eac7052fSMatt Macy "txgs",
298eac7052fSMatt Macy 0644,
299eac7052fSMatt Macy &shl->procfs_list,
300eac7052fSMatt Macy spa_txg_history_show,
301eac7052fSMatt Macy spa_txg_history_show_header,
302eac7052fSMatt Macy spa_txg_history_clear,
303eac7052fSMatt Macy offsetof(spa_txg_history_t, sth_node));
304eac7052fSMatt Macy }
305eac7052fSMatt Macy
306eac7052fSMatt Macy static void
spa_txg_history_destroy(spa_t * spa)307eac7052fSMatt Macy spa_txg_history_destroy(spa_t *spa)
308eac7052fSMatt Macy {
309eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.txg_history;
310eac7052fSMatt Macy procfs_list_uninstall(&shl->procfs_list);
311eac7052fSMatt Macy spa_txg_history_truncate(shl, 0);
312eac7052fSMatt Macy procfs_list_destroy(&shl->procfs_list);
313eac7052fSMatt Macy }
314eac7052fSMatt Macy
315eac7052fSMatt Macy /*
316eac7052fSMatt Macy * Add a new txg to historical record.
317eac7052fSMatt Macy */
318eac7052fSMatt Macy void
spa_txg_history_add(spa_t * spa,uint64_t txg,hrtime_t birth_time)319eac7052fSMatt Macy spa_txg_history_add(spa_t *spa, uint64_t txg, hrtime_t birth_time)
320eac7052fSMatt Macy {
321eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.txg_history;
322eac7052fSMatt Macy spa_txg_history_t *sth;
323eac7052fSMatt Macy
324eac7052fSMatt Macy if (zfs_txg_history == 0 && shl->size == 0)
325eac7052fSMatt Macy return;
326eac7052fSMatt Macy
327eac7052fSMatt Macy sth = kmem_zalloc(sizeof (spa_txg_history_t), KM_SLEEP);
328eac7052fSMatt Macy sth->txg = txg;
329eac7052fSMatt Macy sth->state = TXG_STATE_OPEN;
330eac7052fSMatt Macy sth->times[TXG_STATE_BIRTH] = birth_time;
331eac7052fSMatt Macy
332eac7052fSMatt Macy mutex_enter(&shl->procfs_list.pl_lock);
333eac7052fSMatt Macy procfs_list_add(&shl->procfs_list, sth);
334eac7052fSMatt Macy shl->size++;
335eac7052fSMatt Macy spa_txg_history_truncate(shl, zfs_txg_history);
336eac7052fSMatt Macy mutex_exit(&shl->procfs_list.pl_lock);
337eac7052fSMatt Macy }
338eac7052fSMatt Macy
339eac7052fSMatt Macy /*
340eac7052fSMatt Macy * Set txg state completion time and increment current state.
341eac7052fSMatt Macy */
342eac7052fSMatt Macy int
spa_txg_history_set(spa_t * spa,uint64_t txg,txg_state_t completed_state,hrtime_t completed_time)343eac7052fSMatt Macy spa_txg_history_set(spa_t *spa, uint64_t txg, txg_state_t completed_state,
344eac7052fSMatt Macy hrtime_t completed_time)
345eac7052fSMatt Macy {
346eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.txg_history;
347eac7052fSMatt Macy spa_txg_history_t *sth;
348eac7052fSMatt Macy int error = ENOENT;
349eac7052fSMatt Macy
350eac7052fSMatt Macy if (zfs_txg_history == 0)
351eac7052fSMatt Macy return (0);
352eac7052fSMatt Macy
353eac7052fSMatt Macy mutex_enter(&shl->procfs_list.pl_lock);
354eac7052fSMatt Macy for (sth = list_tail(&shl->procfs_list.pl_list); sth != NULL;
355eac7052fSMatt Macy sth = list_prev(&shl->procfs_list.pl_list, sth)) {
356eac7052fSMatt Macy if (sth->txg == txg) {
357eac7052fSMatt Macy sth->times[completed_state] = completed_time;
358eac7052fSMatt Macy sth->state++;
359eac7052fSMatt Macy error = 0;
360eac7052fSMatt Macy break;
361eac7052fSMatt Macy }
362eac7052fSMatt Macy }
363eac7052fSMatt Macy mutex_exit(&shl->procfs_list.pl_lock);
364eac7052fSMatt Macy
365eac7052fSMatt Macy return (error);
366eac7052fSMatt Macy }
367eac7052fSMatt Macy
368eac7052fSMatt Macy /*
369eac7052fSMatt Macy * Set txg IO stats.
370eac7052fSMatt Macy */
371eac7052fSMatt Macy static int
spa_txg_history_set_io(spa_t * spa,uint64_t txg,uint64_t nread,uint64_t nwritten,uint64_t reads,uint64_t writes,uint64_t ndirty)372eac7052fSMatt Macy spa_txg_history_set_io(spa_t *spa, uint64_t txg, uint64_t nread,
373eac7052fSMatt Macy uint64_t nwritten, uint64_t reads, uint64_t writes, uint64_t ndirty)
374eac7052fSMatt Macy {
375eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.txg_history;
376eac7052fSMatt Macy spa_txg_history_t *sth;
377eac7052fSMatt Macy int error = ENOENT;
378eac7052fSMatt Macy
379eac7052fSMatt Macy if (zfs_txg_history == 0)
380eac7052fSMatt Macy return (0);
381eac7052fSMatt Macy
382eac7052fSMatt Macy mutex_enter(&shl->procfs_list.pl_lock);
383eac7052fSMatt Macy for (sth = list_tail(&shl->procfs_list.pl_list); sth != NULL;
384eac7052fSMatt Macy sth = list_prev(&shl->procfs_list.pl_list, sth)) {
385eac7052fSMatt Macy if (sth->txg == txg) {
386eac7052fSMatt Macy sth->nread = nread;
387eac7052fSMatt Macy sth->nwritten = nwritten;
388eac7052fSMatt Macy sth->reads = reads;
389eac7052fSMatt Macy sth->writes = writes;
390eac7052fSMatt Macy sth->ndirty = ndirty;
391eac7052fSMatt Macy error = 0;
392eac7052fSMatt Macy break;
393eac7052fSMatt Macy }
394eac7052fSMatt Macy }
395eac7052fSMatt Macy mutex_exit(&shl->procfs_list.pl_lock);
396eac7052fSMatt Macy
397eac7052fSMatt Macy return (error);
398eac7052fSMatt Macy }
399eac7052fSMatt Macy
400eac7052fSMatt Macy txg_stat_t *
spa_txg_history_init_io(spa_t * spa,uint64_t txg,dsl_pool_t * dp)401eac7052fSMatt Macy spa_txg_history_init_io(spa_t *spa, uint64_t txg, dsl_pool_t *dp)
402eac7052fSMatt Macy {
403eac7052fSMatt Macy txg_stat_t *ts;
404eac7052fSMatt Macy
405eac7052fSMatt Macy if (zfs_txg_history == 0)
406eac7052fSMatt Macy return (NULL);
407eac7052fSMatt Macy
408eac7052fSMatt Macy ts = kmem_alloc(sizeof (txg_stat_t), KM_SLEEP);
409eac7052fSMatt Macy
410eac7052fSMatt Macy spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
411eac7052fSMatt Macy vdev_get_stats(spa->spa_root_vdev, &ts->vs1);
412eac7052fSMatt Macy spa_config_exit(spa, SCL_CONFIG, FTAG);
413eac7052fSMatt Macy
414eac7052fSMatt Macy ts->txg = txg;
415eac7052fSMatt Macy ts->ndirty = dp->dp_dirty_pertxg[txg & TXG_MASK];
416eac7052fSMatt Macy
417eac7052fSMatt Macy spa_txg_history_set(spa, txg, TXG_STATE_WAIT_FOR_SYNC, gethrtime());
418eac7052fSMatt Macy
419eac7052fSMatt Macy return (ts);
420eac7052fSMatt Macy }
421eac7052fSMatt Macy
422eac7052fSMatt Macy void
spa_txg_history_fini_io(spa_t * spa,txg_stat_t * ts)423eac7052fSMatt Macy spa_txg_history_fini_io(spa_t *spa, txg_stat_t *ts)
424eac7052fSMatt Macy {
425eac7052fSMatt Macy if (ts == NULL)
426eac7052fSMatt Macy return;
427eac7052fSMatt Macy
428eac7052fSMatt Macy if (zfs_txg_history == 0) {
429eac7052fSMatt Macy kmem_free(ts, sizeof (txg_stat_t));
430eac7052fSMatt Macy return;
431eac7052fSMatt Macy }
432eac7052fSMatt Macy
433eac7052fSMatt Macy spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
434eac7052fSMatt Macy vdev_get_stats(spa->spa_root_vdev, &ts->vs2);
435eac7052fSMatt Macy spa_config_exit(spa, SCL_CONFIG, FTAG);
436eac7052fSMatt Macy
437eac7052fSMatt Macy spa_txg_history_set(spa, ts->txg, TXG_STATE_SYNCED, gethrtime());
438eac7052fSMatt Macy spa_txg_history_set_io(spa, ts->txg,
439eac7052fSMatt Macy ts->vs2.vs_bytes[ZIO_TYPE_READ] - ts->vs1.vs_bytes[ZIO_TYPE_READ],
440eac7052fSMatt Macy ts->vs2.vs_bytes[ZIO_TYPE_WRITE] - ts->vs1.vs_bytes[ZIO_TYPE_WRITE],
441eac7052fSMatt Macy ts->vs2.vs_ops[ZIO_TYPE_READ] - ts->vs1.vs_ops[ZIO_TYPE_READ],
442eac7052fSMatt Macy ts->vs2.vs_ops[ZIO_TYPE_WRITE] - ts->vs1.vs_ops[ZIO_TYPE_WRITE],
443eac7052fSMatt Macy ts->ndirty);
444eac7052fSMatt Macy
445eac7052fSMatt Macy kmem_free(ts, sizeof (txg_stat_t));
446eac7052fSMatt Macy }
447eac7052fSMatt Macy
448eac7052fSMatt Macy /*
449eac7052fSMatt Macy * ==========================================================================
450eac7052fSMatt Macy * SPA TX Assign Histogram Routines
451eac7052fSMatt Macy * ==========================================================================
452eac7052fSMatt Macy */
453eac7052fSMatt Macy
454eac7052fSMatt Macy /*
455eac7052fSMatt Macy * Tx statistics - Information exported regarding dmu_tx_assign time.
456eac7052fSMatt Macy */
457eac7052fSMatt Macy
458eac7052fSMatt Macy /*
459eac7052fSMatt Macy * When the kstat is written zero all buckets. When the kstat is read
460eac7052fSMatt Macy * count the number of trailing buckets set to zero and update ks_ndata
461eac7052fSMatt Macy * such that they are not output.
462eac7052fSMatt Macy */
463eac7052fSMatt Macy static int
spa_tx_assign_update(kstat_t * ksp,int rw)464eac7052fSMatt Macy spa_tx_assign_update(kstat_t *ksp, int rw)
465eac7052fSMatt Macy {
466eac7052fSMatt Macy spa_t *spa = ksp->ks_private;
467eac7052fSMatt Macy spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram;
468eac7052fSMatt Macy int i;
469eac7052fSMatt Macy
470eac7052fSMatt Macy if (rw == KSTAT_WRITE) {
471eac7052fSMatt Macy for (i = 0; i < shk->count; i++)
472eac7052fSMatt Macy ((kstat_named_t *)shk->priv)[i].value.ui64 = 0;
473eac7052fSMatt Macy }
474eac7052fSMatt Macy
475eac7052fSMatt Macy for (i = shk->count; i > 0; i--)
476eac7052fSMatt Macy if (((kstat_named_t *)shk->priv)[i-1].value.ui64 != 0)
477eac7052fSMatt Macy break;
478eac7052fSMatt Macy
479eac7052fSMatt Macy ksp->ks_ndata = i;
480eac7052fSMatt Macy ksp->ks_data_size = i * sizeof (kstat_named_t);
481eac7052fSMatt Macy
482eac7052fSMatt Macy return (0);
483eac7052fSMatt Macy }
484eac7052fSMatt Macy
485eac7052fSMatt Macy static void
spa_tx_assign_init(spa_t * spa)486eac7052fSMatt Macy spa_tx_assign_init(spa_t *spa)
487eac7052fSMatt Macy {
488eac7052fSMatt Macy spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram;
489eac7052fSMatt Macy char *name;
490eac7052fSMatt Macy kstat_named_t *ks;
491eac7052fSMatt Macy kstat_t *ksp;
492eac7052fSMatt Macy int i;
493eac7052fSMatt Macy
494eac7052fSMatt Macy mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL);
495eac7052fSMatt Macy
496eac7052fSMatt Macy shk->count = 42; /* power of two buckets for 1ns to 2,199s */
497eac7052fSMatt Macy shk->size = shk->count * sizeof (kstat_named_t);
498eac7052fSMatt Macy shk->priv = kmem_alloc(shk->size, KM_SLEEP);
499eac7052fSMatt Macy
500eac7052fSMatt Macy name = kmem_asprintf("zfs/%s", spa_name(spa));
501eac7052fSMatt Macy
502eac7052fSMatt Macy for (i = 0; i < shk->count; i++) {
503eac7052fSMatt Macy ks = &((kstat_named_t *)shk->priv)[i];
504eac7052fSMatt Macy ks->data_type = KSTAT_DATA_UINT64;
505eac7052fSMatt Macy ks->value.ui64 = 0;
506eac7052fSMatt Macy (void) snprintf(ks->name, KSTAT_STRLEN, "%llu ns",
507eac7052fSMatt Macy (u_longlong_t)1 << i);
508eac7052fSMatt Macy }
509eac7052fSMatt Macy
510eac7052fSMatt Macy ksp = kstat_create(name, 0, "dmu_tx_assign", "misc",
511eac7052fSMatt Macy KSTAT_TYPE_NAMED, 0, KSTAT_FLAG_VIRTUAL);
512eac7052fSMatt Macy shk->kstat = ksp;
513eac7052fSMatt Macy
514eac7052fSMatt Macy if (ksp) {
515eac7052fSMatt Macy ksp->ks_lock = &shk->lock;
516eac7052fSMatt Macy ksp->ks_data = shk->priv;
517eac7052fSMatt Macy ksp->ks_ndata = shk->count;
518eac7052fSMatt Macy ksp->ks_data_size = shk->size;
519eac7052fSMatt Macy ksp->ks_private = spa;
520eac7052fSMatt Macy ksp->ks_update = spa_tx_assign_update;
521eac7052fSMatt Macy kstat_install(ksp);
522eac7052fSMatt Macy }
523eac7052fSMatt Macy kmem_strfree(name);
524eac7052fSMatt Macy }
525eac7052fSMatt Macy
526eac7052fSMatt Macy static void
spa_tx_assign_destroy(spa_t * spa)527eac7052fSMatt Macy spa_tx_assign_destroy(spa_t *spa)
528eac7052fSMatt Macy {
529eac7052fSMatt Macy spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram;
530eac7052fSMatt Macy kstat_t *ksp;
531eac7052fSMatt Macy
532eac7052fSMatt Macy ksp = shk->kstat;
533eac7052fSMatt Macy if (ksp)
534eac7052fSMatt Macy kstat_delete(ksp);
535eac7052fSMatt Macy
536eac7052fSMatt Macy kmem_free(shk->priv, shk->size);
537eac7052fSMatt Macy mutex_destroy(&shk->lock);
538eac7052fSMatt Macy }
539eac7052fSMatt Macy
540eac7052fSMatt Macy void
spa_tx_assign_add_nsecs(spa_t * spa,uint64_t nsecs)541eac7052fSMatt Macy spa_tx_assign_add_nsecs(spa_t *spa, uint64_t nsecs)
542eac7052fSMatt Macy {
543eac7052fSMatt Macy spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram;
544eac7052fSMatt Macy uint64_t idx = 0;
545eac7052fSMatt Macy
546eac7052fSMatt Macy while (((1ULL << idx) < nsecs) && (idx < shk->size - 1))
547eac7052fSMatt Macy idx++;
548eac7052fSMatt Macy
549eac7052fSMatt Macy atomic_inc_64(&((kstat_named_t *)shk->priv)[idx].value.ui64);
550eac7052fSMatt Macy }
551eac7052fSMatt Macy
552eac7052fSMatt Macy /*
553eac7052fSMatt Macy * ==========================================================================
554eac7052fSMatt Macy * SPA MMP History Routines
555eac7052fSMatt Macy * ==========================================================================
556eac7052fSMatt Macy */
557eac7052fSMatt Macy
558eac7052fSMatt Macy /*
559eac7052fSMatt Macy * MMP statistics - Information exported regarding attempted MMP writes
560eac7052fSMatt Macy * For MMP writes issued, fields used as per comments below.
561eac7052fSMatt Macy * For MMP writes skipped, an entry represents a span of time when
562eac7052fSMatt Macy * writes were skipped for same reason (error from mmp_random_leaf).
563eac7052fSMatt Macy * Differences are:
564eac7052fSMatt Macy * timestamp time first write skipped, if >1 skipped in a row
565eac7052fSMatt Macy * mmp_delay delay value at timestamp
566eac7052fSMatt Macy * vdev_guid number of writes skipped
567eac7052fSMatt Macy * io_error one of enum mmp_error
568eac7052fSMatt Macy * duration time span (ns) of skipped writes
569eac7052fSMatt Macy */
570eac7052fSMatt Macy
571eac7052fSMatt Macy typedef struct spa_mmp_history {
572eac7052fSMatt Macy uint64_t mmp_node_id; /* unique # for updates */
573eac7052fSMatt Macy uint64_t txg; /* txg of last sync */
574eac7052fSMatt Macy uint64_t timestamp; /* UTC time MMP write issued */
575eac7052fSMatt Macy uint64_t mmp_delay; /* mmp_thread.mmp_delay at timestamp */
576eac7052fSMatt Macy uint64_t vdev_guid; /* unique ID of leaf vdev */
577eac7052fSMatt Macy char *vdev_path;
578eac7052fSMatt Macy int vdev_label; /* vdev label */
579eac7052fSMatt Macy int io_error; /* error status of MMP write */
580eac7052fSMatt Macy hrtime_t error_start; /* hrtime of start of error period */
581eac7052fSMatt Macy hrtime_t duration; /* time from submission to completion */
582eac7052fSMatt Macy procfs_list_node_t smh_node;
583eac7052fSMatt Macy } spa_mmp_history_t;
584eac7052fSMatt Macy
585eac7052fSMatt Macy static int
spa_mmp_history_show_header(struct seq_file * f)586eac7052fSMatt Macy spa_mmp_history_show_header(struct seq_file *f)
587eac7052fSMatt Macy {
588eac7052fSMatt Macy seq_printf(f, "%-10s %-10s %-10s %-6s %-10s %-12s %-24s "
589eac7052fSMatt Macy "%-10s %s\n", "id", "txg", "timestamp", "error", "duration",
590eac7052fSMatt Macy "mmp_delay", "vdev_guid", "vdev_label", "vdev_path");
591eac7052fSMatt Macy return (0);
592eac7052fSMatt Macy }
593eac7052fSMatt Macy
594eac7052fSMatt Macy static int
spa_mmp_history_show(struct seq_file * f,void * data)595eac7052fSMatt Macy spa_mmp_history_show(struct seq_file *f, void *data)
596eac7052fSMatt Macy {
597eac7052fSMatt Macy spa_mmp_history_t *smh = (spa_mmp_history_t *)data;
598eac7052fSMatt Macy char skip_fmt[] = "%-10llu %-10llu %10llu %#6llx %10lld %12llu %-24llu "
599eac7052fSMatt Macy "%-10lld %s\n";
600eac7052fSMatt Macy char write_fmt[] = "%-10llu %-10llu %10llu %6lld %10lld %12llu %-24llu "
601eac7052fSMatt Macy "%-10lld %s\n";
602eac7052fSMatt Macy
603eac7052fSMatt Macy seq_printf(f, (smh->error_start ? skip_fmt : write_fmt),
604eac7052fSMatt Macy (u_longlong_t)smh->mmp_node_id, (u_longlong_t)smh->txg,
605eac7052fSMatt Macy (u_longlong_t)smh->timestamp, (longlong_t)smh->io_error,
606eac7052fSMatt Macy (longlong_t)smh->duration, (u_longlong_t)smh->mmp_delay,
607eac7052fSMatt Macy (u_longlong_t)smh->vdev_guid, (u_longlong_t)smh->vdev_label,
608eac7052fSMatt Macy (smh->vdev_path ? smh->vdev_path : "-"));
609eac7052fSMatt Macy
610eac7052fSMatt Macy return (0);
611eac7052fSMatt Macy }
612eac7052fSMatt Macy
613eac7052fSMatt Macy /* Remove oldest elements from list until there are no more than 'size' left */
614eac7052fSMatt Macy static void
spa_mmp_history_truncate(spa_history_list_t * shl,unsigned int size)615eac7052fSMatt Macy spa_mmp_history_truncate(spa_history_list_t *shl, unsigned int size)
616eac7052fSMatt Macy {
617eac7052fSMatt Macy spa_mmp_history_t *smh;
618eac7052fSMatt Macy while (shl->size > size) {
619eac7052fSMatt Macy smh = list_remove_head(&shl->procfs_list.pl_list);
620eac7052fSMatt Macy if (smh->vdev_path)
621eac7052fSMatt Macy kmem_strfree(smh->vdev_path);
622eac7052fSMatt Macy kmem_free(smh, sizeof (spa_mmp_history_t));
623eac7052fSMatt Macy shl->size--;
624eac7052fSMatt Macy }
625eac7052fSMatt Macy
626eac7052fSMatt Macy if (size == 0)
627eac7052fSMatt Macy ASSERT(list_is_empty(&shl->procfs_list.pl_list));
628eac7052fSMatt Macy
629eac7052fSMatt Macy }
630eac7052fSMatt Macy
631eac7052fSMatt Macy static int
spa_mmp_history_clear(procfs_list_t * procfs_list)632eac7052fSMatt Macy spa_mmp_history_clear(procfs_list_t *procfs_list)
633eac7052fSMatt Macy {
634eac7052fSMatt Macy spa_history_list_t *shl = procfs_list->pl_private;
635eac7052fSMatt Macy mutex_enter(&procfs_list->pl_lock);
636eac7052fSMatt Macy spa_mmp_history_truncate(shl, 0);
637eac7052fSMatt Macy mutex_exit(&procfs_list->pl_lock);
638eac7052fSMatt Macy return (0);
639eac7052fSMatt Macy }
640eac7052fSMatt Macy
641eac7052fSMatt Macy static void
spa_mmp_history_init(spa_t * spa)642eac7052fSMatt Macy spa_mmp_history_init(spa_t *spa)
643eac7052fSMatt Macy {
644eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.mmp_history;
645eac7052fSMatt Macy
646eac7052fSMatt Macy shl->size = 0;
647eac7052fSMatt Macy
648eac7052fSMatt Macy shl->procfs_list.pl_private = shl;
649c40487d4SMatt Macy procfs_list_install("zfs",
650c40487d4SMatt Macy spa_name(spa),
651eac7052fSMatt Macy "multihost",
652eac7052fSMatt Macy 0644,
653eac7052fSMatt Macy &shl->procfs_list,
654eac7052fSMatt Macy spa_mmp_history_show,
655eac7052fSMatt Macy spa_mmp_history_show_header,
656eac7052fSMatt Macy spa_mmp_history_clear,
657eac7052fSMatt Macy offsetof(spa_mmp_history_t, smh_node));
658eac7052fSMatt Macy }
659eac7052fSMatt Macy
660eac7052fSMatt Macy static void
spa_mmp_history_destroy(spa_t * spa)661eac7052fSMatt Macy spa_mmp_history_destroy(spa_t *spa)
662eac7052fSMatt Macy {
663eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.mmp_history;
664eac7052fSMatt Macy procfs_list_uninstall(&shl->procfs_list);
665eac7052fSMatt Macy spa_mmp_history_truncate(shl, 0);
666eac7052fSMatt Macy procfs_list_destroy(&shl->procfs_list);
667eac7052fSMatt Macy }
668eac7052fSMatt Macy
669eac7052fSMatt Macy /*
670eac7052fSMatt Macy * Set duration in existing "skip" record to how long we have waited for a leaf
671eac7052fSMatt Macy * vdev to become available.
672eac7052fSMatt Macy *
673eac7052fSMatt Macy * Important that we start search at the tail of the list where new
674eac7052fSMatt Macy * records are inserted, so this is normally an O(1) operation.
675eac7052fSMatt Macy */
676eac7052fSMatt Macy int
spa_mmp_history_set_skip(spa_t * spa,uint64_t mmp_node_id)677eac7052fSMatt Macy spa_mmp_history_set_skip(spa_t *spa, uint64_t mmp_node_id)
678eac7052fSMatt Macy {
679eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.mmp_history;
680eac7052fSMatt Macy spa_mmp_history_t *smh;
681eac7052fSMatt Macy int error = ENOENT;
682eac7052fSMatt Macy
683eac7052fSMatt Macy if (zfs_multihost_history == 0 && shl->size == 0)
684eac7052fSMatt Macy return (0);
685eac7052fSMatt Macy
686eac7052fSMatt Macy mutex_enter(&shl->procfs_list.pl_lock);
687eac7052fSMatt Macy for (smh = list_tail(&shl->procfs_list.pl_list); smh != NULL;
688eac7052fSMatt Macy smh = list_prev(&shl->procfs_list.pl_list, smh)) {
689eac7052fSMatt Macy if (smh->mmp_node_id == mmp_node_id) {
690eac7052fSMatt Macy ASSERT3U(smh->io_error, !=, 0);
691eac7052fSMatt Macy smh->duration = gethrtime() - smh->error_start;
692eac7052fSMatt Macy smh->vdev_guid++;
693eac7052fSMatt Macy error = 0;
694eac7052fSMatt Macy break;
695eac7052fSMatt Macy }
696eac7052fSMatt Macy }
697eac7052fSMatt Macy mutex_exit(&shl->procfs_list.pl_lock);
698eac7052fSMatt Macy
699eac7052fSMatt Macy return (error);
700eac7052fSMatt Macy }
701eac7052fSMatt Macy
702eac7052fSMatt Macy /*
703eac7052fSMatt Macy * Set MMP write duration and error status in existing record.
704eac7052fSMatt Macy * See comment re: search order above spa_mmp_history_set_skip().
705eac7052fSMatt Macy */
706eac7052fSMatt Macy int
spa_mmp_history_set(spa_t * spa,uint64_t mmp_node_id,int io_error,hrtime_t duration)707eac7052fSMatt Macy spa_mmp_history_set(spa_t *spa, uint64_t mmp_node_id, int io_error,
708eac7052fSMatt Macy hrtime_t duration)
709eac7052fSMatt Macy {
710eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.mmp_history;
711eac7052fSMatt Macy spa_mmp_history_t *smh;
712eac7052fSMatt Macy int error = ENOENT;
713eac7052fSMatt Macy
714eac7052fSMatt Macy if (zfs_multihost_history == 0 && shl->size == 0)
715eac7052fSMatt Macy return (0);
716eac7052fSMatt Macy
717eac7052fSMatt Macy mutex_enter(&shl->procfs_list.pl_lock);
718eac7052fSMatt Macy for (smh = list_tail(&shl->procfs_list.pl_list); smh != NULL;
719eac7052fSMatt Macy smh = list_prev(&shl->procfs_list.pl_list, smh)) {
720eac7052fSMatt Macy if (smh->mmp_node_id == mmp_node_id) {
721eac7052fSMatt Macy ASSERT(smh->io_error == 0);
722eac7052fSMatt Macy smh->io_error = io_error;
723eac7052fSMatt Macy smh->duration = duration;
724eac7052fSMatt Macy error = 0;
725eac7052fSMatt Macy break;
726eac7052fSMatt Macy }
727eac7052fSMatt Macy }
728eac7052fSMatt Macy mutex_exit(&shl->procfs_list.pl_lock);
729eac7052fSMatt Macy
730eac7052fSMatt Macy return (error);
731eac7052fSMatt Macy }
732eac7052fSMatt Macy
733eac7052fSMatt Macy /*
734eac7052fSMatt Macy * Add a new MMP historical record.
735eac7052fSMatt Macy * error == 0 : a write was issued.
736eac7052fSMatt Macy * error != 0 : a write was not issued because no leaves were found.
737eac7052fSMatt Macy */
738eac7052fSMatt Macy void
spa_mmp_history_add(spa_t * spa,uint64_t txg,uint64_t timestamp,uint64_t mmp_delay,vdev_t * vd,int label,uint64_t mmp_node_id,int error)739eac7052fSMatt Macy spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp,
740eac7052fSMatt Macy uint64_t mmp_delay, vdev_t *vd, int label, uint64_t mmp_node_id,
741eac7052fSMatt Macy int error)
742eac7052fSMatt Macy {
743eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.mmp_history;
744eac7052fSMatt Macy spa_mmp_history_t *smh;
745eac7052fSMatt Macy
746eac7052fSMatt Macy if (zfs_multihost_history == 0 && shl->size == 0)
747eac7052fSMatt Macy return;
748eac7052fSMatt Macy
749eac7052fSMatt Macy smh = kmem_zalloc(sizeof (spa_mmp_history_t), KM_SLEEP);
750eac7052fSMatt Macy smh->txg = txg;
751eac7052fSMatt Macy smh->timestamp = timestamp;
752eac7052fSMatt Macy smh->mmp_delay = mmp_delay;
753eac7052fSMatt Macy if (vd) {
754eac7052fSMatt Macy smh->vdev_guid = vd->vdev_guid;
755eac7052fSMatt Macy if (vd->vdev_path)
756eac7052fSMatt Macy smh->vdev_path = kmem_strdup(vd->vdev_path);
757eac7052fSMatt Macy }
758eac7052fSMatt Macy smh->vdev_label = label;
759eac7052fSMatt Macy smh->mmp_node_id = mmp_node_id;
760eac7052fSMatt Macy
761eac7052fSMatt Macy if (error) {
762eac7052fSMatt Macy smh->io_error = error;
763eac7052fSMatt Macy smh->error_start = gethrtime();
764eac7052fSMatt Macy smh->vdev_guid = 1;
765eac7052fSMatt Macy }
766eac7052fSMatt Macy
767eac7052fSMatt Macy mutex_enter(&shl->procfs_list.pl_lock);
768eac7052fSMatt Macy procfs_list_add(&shl->procfs_list, smh);
769eac7052fSMatt Macy shl->size++;
770eac7052fSMatt Macy spa_mmp_history_truncate(shl, zfs_multihost_history);
771eac7052fSMatt Macy mutex_exit(&shl->procfs_list.pl_lock);
772eac7052fSMatt Macy }
773eac7052fSMatt Macy
774eac7052fSMatt Macy static void *
spa_state_addr(kstat_t * ksp,loff_t n)775eac7052fSMatt Macy spa_state_addr(kstat_t *ksp, loff_t n)
776eac7052fSMatt Macy {
777eac7052fSMatt Macy if (n == 0)
778eac7052fSMatt Macy return (ksp->ks_private); /* return the spa_t */
779eac7052fSMatt Macy return (NULL);
780eac7052fSMatt Macy }
781eac7052fSMatt Macy
782eac7052fSMatt Macy static int
spa_state_data(char * buf,size_t size,void * data)783eac7052fSMatt Macy spa_state_data(char *buf, size_t size, void *data)
784eac7052fSMatt Macy {
785eac7052fSMatt Macy spa_t *spa = (spa_t *)data;
786eac7052fSMatt Macy (void) snprintf(buf, size, "%s\n", spa_state_to_name(spa));
787eac7052fSMatt Macy return (0);
788eac7052fSMatt Macy }
789eac7052fSMatt Macy
790eac7052fSMatt Macy /*
791eac7052fSMatt Macy * Return the state of the pool in /proc/spl/kstat/zfs/<pool>/state.
792eac7052fSMatt Macy *
793eac7052fSMatt Macy * This is a lock-less read of the pool's state (unlike using 'zpool', which
794eac7052fSMatt Macy * can potentially block for seconds). Because it doesn't block, it can useful
795eac7052fSMatt Macy * as a pool heartbeat value.
796eac7052fSMatt Macy */
797eac7052fSMatt Macy static void
spa_state_init(spa_t * spa)798eac7052fSMatt Macy spa_state_init(spa_t *spa)
799eac7052fSMatt Macy {
800eac7052fSMatt Macy spa_history_kstat_t *shk = &spa->spa_stats.state;
801eac7052fSMatt Macy char *name;
802eac7052fSMatt Macy kstat_t *ksp;
803eac7052fSMatt Macy
804eac7052fSMatt Macy mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL);
805eac7052fSMatt Macy
806eac7052fSMatt Macy name = kmem_asprintf("zfs/%s", spa_name(spa));
807eac7052fSMatt Macy ksp = kstat_create(name, 0, "state", "misc",
808eac7052fSMatt Macy KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
809eac7052fSMatt Macy
810eac7052fSMatt Macy shk->kstat = ksp;
811eac7052fSMatt Macy if (ksp) {
812eac7052fSMatt Macy ksp->ks_lock = &shk->lock;
813eac7052fSMatt Macy ksp->ks_data = NULL;
814eac7052fSMatt Macy ksp->ks_private = spa;
815eac7052fSMatt Macy ksp->ks_flags |= KSTAT_FLAG_NO_HEADERS;
816eac7052fSMatt Macy kstat_set_raw_ops(ksp, NULL, spa_state_data, spa_state_addr);
817eac7052fSMatt Macy kstat_install(ksp);
818eac7052fSMatt Macy }
819eac7052fSMatt Macy
820eac7052fSMatt Macy kmem_strfree(name);
821eac7052fSMatt Macy }
822eac7052fSMatt Macy
823e3aa18adSMartin Matuska static int
spa_guid_data(char * buf,size_t size,void * data)824e3aa18adSMartin Matuska spa_guid_data(char *buf, size_t size, void *data)
825e3aa18adSMartin Matuska {
826e3aa18adSMartin Matuska spa_t *spa = (spa_t *)data;
827e3aa18adSMartin Matuska (void) snprintf(buf, size, "%llu\n", (u_longlong_t)spa_guid(spa));
828e3aa18adSMartin Matuska return (0);
829e3aa18adSMartin Matuska }
830e3aa18adSMartin Matuska
831e3aa18adSMartin Matuska static void
spa_guid_init(spa_t * spa)832e3aa18adSMartin Matuska spa_guid_init(spa_t *spa)
833e3aa18adSMartin Matuska {
834e3aa18adSMartin Matuska spa_history_kstat_t *shk = &spa->spa_stats.guid;
835e3aa18adSMartin Matuska char *name;
836e3aa18adSMartin Matuska kstat_t *ksp;
837e3aa18adSMartin Matuska
838e3aa18adSMartin Matuska mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL);
839e3aa18adSMartin Matuska
840e3aa18adSMartin Matuska name = kmem_asprintf("zfs/%s", spa_name(spa));
841e3aa18adSMartin Matuska
842e3aa18adSMartin Matuska ksp = kstat_create(name, 0, "guid", "misc",
843e3aa18adSMartin Matuska KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
844e3aa18adSMartin Matuska
845e3aa18adSMartin Matuska shk->kstat = ksp;
846e3aa18adSMartin Matuska if (ksp) {
847e3aa18adSMartin Matuska ksp->ks_lock = &shk->lock;
848e3aa18adSMartin Matuska ksp->ks_data = NULL;
849e3aa18adSMartin Matuska ksp->ks_private = spa;
850e3aa18adSMartin Matuska ksp->ks_flags |= KSTAT_FLAG_NO_HEADERS;
851e3aa18adSMartin Matuska kstat_set_raw_ops(ksp, NULL, spa_guid_data, spa_state_addr);
852e3aa18adSMartin Matuska kstat_install(ksp);
853e3aa18adSMartin Matuska }
854e3aa18adSMartin Matuska
855e3aa18adSMartin Matuska kmem_strfree(name);
856e3aa18adSMartin Matuska }
857e3aa18adSMartin Matuska
858eac7052fSMatt Macy static void
spa_health_destroy(spa_t * spa)859eac7052fSMatt Macy spa_health_destroy(spa_t *spa)
860eac7052fSMatt Macy {
861eac7052fSMatt Macy spa_history_kstat_t *shk = &spa->spa_stats.state;
862eac7052fSMatt Macy kstat_t *ksp = shk->kstat;
863eac7052fSMatt Macy if (ksp)
864eac7052fSMatt Macy kstat_delete(ksp);
865eac7052fSMatt Macy
866eac7052fSMatt Macy mutex_destroy(&shk->lock);
867eac7052fSMatt Macy }
868eac7052fSMatt Macy
869e3aa18adSMartin Matuska static void
spa_guid_destroy(spa_t * spa)870e3aa18adSMartin Matuska spa_guid_destroy(spa_t *spa)
871e3aa18adSMartin Matuska {
872e3aa18adSMartin Matuska spa_history_kstat_t *shk = &spa->spa_stats.guid;
873e3aa18adSMartin Matuska kstat_t *ksp = shk->kstat;
874e3aa18adSMartin Matuska if (ksp)
875e3aa18adSMartin Matuska kstat_delete(ksp);
876e3aa18adSMartin Matuska
877e3aa18adSMartin Matuska mutex_destroy(&shk->lock);
878e3aa18adSMartin Matuska }
879e3aa18adSMartin Matuska
880e92ffd9bSMartin Matuska static const spa_iostats_t spa_iostats_template = {
881eac7052fSMatt Macy { "trim_extents_written", KSTAT_DATA_UINT64 },
882eac7052fSMatt Macy { "trim_bytes_written", KSTAT_DATA_UINT64 },
883eac7052fSMatt Macy { "trim_extents_skipped", KSTAT_DATA_UINT64 },
884eac7052fSMatt Macy { "trim_bytes_skipped", KSTAT_DATA_UINT64 },
885eac7052fSMatt Macy { "trim_extents_failed", KSTAT_DATA_UINT64 },
886eac7052fSMatt Macy { "trim_bytes_failed", KSTAT_DATA_UINT64 },
887eac7052fSMatt Macy { "autotrim_extents_written", KSTAT_DATA_UINT64 },
888eac7052fSMatt Macy { "autotrim_bytes_written", KSTAT_DATA_UINT64 },
889eac7052fSMatt Macy { "autotrim_extents_skipped", KSTAT_DATA_UINT64 },
890eac7052fSMatt Macy { "autotrim_bytes_skipped", KSTAT_DATA_UINT64 },
891eac7052fSMatt Macy { "autotrim_extents_failed", KSTAT_DATA_UINT64 },
892eac7052fSMatt Macy { "autotrim_bytes_failed", KSTAT_DATA_UINT64 },
893eac7052fSMatt Macy { "simple_trim_extents_written", KSTAT_DATA_UINT64 },
894eac7052fSMatt Macy { "simple_trim_bytes_written", KSTAT_DATA_UINT64 },
895eac7052fSMatt Macy { "simple_trim_extents_skipped", KSTAT_DATA_UINT64 },
896eac7052fSMatt Macy { "simple_trim_bytes_skipped", KSTAT_DATA_UINT64 },
897eac7052fSMatt Macy { "simple_trim_extents_failed", KSTAT_DATA_UINT64 },
898eac7052fSMatt Macy { "simple_trim_bytes_failed", KSTAT_DATA_UINT64 },
8997a7741afSMartin Matuska { "arc_read_count", KSTAT_DATA_UINT64 },
9007a7741afSMartin Matuska { "arc_read_bytes", KSTAT_DATA_UINT64 },
9017a7741afSMartin Matuska { "arc_write_count", KSTAT_DATA_UINT64 },
9027a7741afSMartin Matuska { "arc_write_bytes", KSTAT_DATA_UINT64 },
9037a7741afSMartin Matuska { "direct_read_count", KSTAT_DATA_UINT64 },
9047a7741afSMartin Matuska { "direct_read_bytes", KSTAT_DATA_UINT64 },
9057a7741afSMartin Matuska { "direct_write_count", KSTAT_DATA_UINT64 },
9067a7741afSMartin Matuska { "direct_write_bytes", KSTAT_DATA_UINT64 },
907eac7052fSMatt Macy };
908eac7052fSMatt Macy
909eac7052fSMatt Macy #define SPA_IOSTATS_ADD(stat, val) \
910eac7052fSMatt Macy atomic_add_64(&iostats->stat.value.ui64, (val));
911eac7052fSMatt Macy
912eac7052fSMatt Macy void
spa_iostats_trim_add(spa_t * spa,trim_type_t type,uint64_t extents_written,uint64_t bytes_written,uint64_t extents_skipped,uint64_t bytes_skipped,uint64_t extents_failed,uint64_t bytes_failed)913eac7052fSMatt Macy spa_iostats_trim_add(spa_t *spa, trim_type_t type,
914eac7052fSMatt Macy uint64_t extents_written, uint64_t bytes_written,
915eac7052fSMatt Macy uint64_t extents_skipped, uint64_t bytes_skipped,
916eac7052fSMatt Macy uint64_t extents_failed, uint64_t bytes_failed)
917eac7052fSMatt Macy {
918eac7052fSMatt Macy spa_history_kstat_t *shk = &spa->spa_stats.iostats;
919eac7052fSMatt Macy kstat_t *ksp = shk->kstat;
920eac7052fSMatt Macy spa_iostats_t *iostats;
921eac7052fSMatt Macy
922eac7052fSMatt Macy if (ksp == NULL)
923eac7052fSMatt Macy return;
924eac7052fSMatt Macy
925eac7052fSMatt Macy iostats = ksp->ks_data;
926eac7052fSMatt Macy if (type == TRIM_TYPE_MANUAL) {
927eac7052fSMatt Macy SPA_IOSTATS_ADD(trim_extents_written, extents_written);
928eac7052fSMatt Macy SPA_IOSTATS_ADD(trim_bytes_written, bytes_written);
929eac7052fSMatt Macy SPA_IOSTATS_ADD(trim_extents_skipped, extents_skipped);
930eac7052fSMatt Macy SPA_IOSTATS_ADD(trim_bytes_skipped, bytes_skipped);
931eac7052fSMatt Macy SPA_IOSTATS_ADD(trim_extents_failed, extents_failed);
932eac7052fSMatt Macy SPA_IOSTATS_ADD(trim_bytes_failed, bytes_failed);
933eac7052fSMatt Macy } else if (type == TRIM_TYPE_AUTO) {
934eac7052fSMatt Macy SPA_IOSTATS_ADD(autotrim_extents_written, extents_written);
935eac7052fSMatt Macy SPA_IOSTATS_ADD(autotrim_bytes_written, bytes_written);
936eac7052fSMatt Macy SPA_IOSTATS_ADD(autotrim_extents_skipped, extents_skipped);
937eac7052fSMatt Macy SPA_IOSTATS_ADD(autotrim_bytes_skipped, bytes_skipped);
938eac7052fSMatt Macy SPA_IOSTATS_ADD(autotrim_extents_failed, extents_failed);
939eac7052fSMatt Macy SPA_IOSTATS_ADD(autotrim_bytes_failed, bytes_failed);
940eac7052fSMatt Macy } else {
941eac7052fSMatt Macy SPA_IOSTATS_ADD(simple_trim_extents_written, extents_written);
942eac7052fSMatt Macy SPA_IOSTATS_ADD(simple_trim_bytes_written, bytes_written);
943eac7052fSMatt Macy SPA_IOSTATS_ADD(simple_trim_extents_skipped, extents_skipped);
944eac7052fSMatt Macy SPA_IOSTATS_ADD(simple_trim_bytes_skipped, bytes_skipped);
945eac7052fSMatt Macy SPA_IOSTATS_ADD(simple_trim_extents_failed, extents_failed);
946eac7052fSMatt Macy SPA_IOSTATS_ADD(simple_trim_bytes_failed, bytes_failed);
947eac7052fSMatt Macy }
948eac7052fSMatt Macy }
949eac7052fSMatt Macy
9507a7741afSMartin Matuska void
spa_iostats_read_add(spa_t * spa,uint64_t size,uint64_t iops,dmu_flags_t flags)951*b1c1ee44SMartin Matuska spa_iostats_read_add(spa_t *spa, uint64_t size, uint64_t iops,
952*b1c1ee44SMartin Matuska dmu_flags_t flags)
9537a7741afSMartin Matuska {
9547a7741afSMartin Matuska spa_history_kstat_t *shk = &spa->spa_stats.iostats;
9557a7741afSMartin Matuska kstat_t *ksp = shk->kstat;
9567a7741afSMartin Matuska
9577a7741afSMartin Matuska if (ksp == NULL)
9587a7741afSMartin Matuska return;
9597a7741afSMartin Matuska
9607a7741afSMartin Matuska spa_iostats_t *iostats = ksp->ks_data;
9617a7741afSMartin Matuska if (flags & DMU_DIRECTIO) {
9627a7741afSMartin Matuska SPA_IOSTATS_ADD(direct_read_count, iops);
9637a7741afSMartin Matuska SPA_IOSTATS_ADD(direct_read_bytes, size);
9647a7741afSMartin Matuska } else {
9657a7741afSMartin Matuska SPA_IOSTATS_ADD(arc_read_count, iops);
9667a7741afSMartin Matuska SPA_IOSTATS_ADD(arc_read_bytes, size);
9677a7741afSMartin Matuska }
9687a7741afSMartin Matuska }
9697a7741afSMartin Matuska
9707a7741afSMartin Matuska void
spa_iostats_write_add(spa_t * spa,uint64_t size,uint64_t iops,dmu_flags_t flags)971*b1c1ee44SMartin Matuska spa_iostats_write_add(spa_t *spa, uint64_t size, uint64_t iops,
972*b1c1ee44SMartin Matuska dmu_flags_t flags)
9737a7741afSMartin Matuska {
9747a7741afSMartin Matuska spa_history_kstat_t *shk = &spa->spa_stats.iostats;
9757a7741afSMartin Matuska kstat_t *ksp = shk->kstat;
9767a7741afSMartin Matuska
9777a7741afSMartin Matuska if (ksp == NULL)
9787a7741afSMartin Matuska return;
9797a7741afSMartin Matuska
9807a7741afSMartin Matuska spa_iostats_t *iostats = ksp->ks_data;
9817a7741afSMartin Matuska if (flags & DMU_DIRECTIO) {
9827a7741afSMartin Matuska SPA_IOSTATS_ADD(direct_write_count, iops);
9837a7741afSMartin Matuska SPA_IOSTATS_ADD(direct_write_bytes, size);
9847a7741afSMartin Matuska } else {
9857a7741afSMartin Matuska SPA_IOSTATS_ADD(arc_write_count, iops);
9867a7741afSMartin Matuska SPA_IOSTATS_ADD(arc_write_bytes, size);
9877a7741afSMartin Matuska }
9887a7741afSMartin Matuska }
9897a7741afSMartin Matuska
990eac7052fSMatt Macy static int
spa_iostats_update(kstat_t * ksp,int rw)991eac7052fSMatt Macy spa_iostats_update(kstat_t *ksp, int rw)
992eac7052fSMatt Macy {
993eac7052fSMatt Macy if (rw == KSTAT_WRITE) {
994eac7052fSMatt Macy memcpy(ksp->ks_data, &spa_iostats_template,
995eac7052fSMatt Macy sizeof (spa_iostats_t));
996eac7052fSMatt Macy }
997eac7052fSMatt Macy
998eac7052fSMatt Macy return (0);
999eac7052fSMatt Macy }
1000eac7052fSMatt Macy
1001eac7052fSMatt Macy static void
spa_iostats_init(spa_t * spa)1002eac7052fSMatt Macy spa_iostats_init(spa_t *spa)
1003eac7052fSMatt Macy {
1004eac7052fSMatt Macy spa_history_kstat_t *shk = &spa->spa_stats.iostats;
1005eac7052fSMatt Macy
1006eac7052fSMatt Macy mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL);
1007eac7052fSMatt Macy
1008eac7052fSMatt Macy char *name = kmem_asprintf("zfs/%s", spa_name(spa));
1009eac7052fSMatt Macy kstat_t *ksp = kstat_create(name, 0, "iostats", "misc",
1010eac7052fSMatt Macy KSTAT_TYPE_NAMED, sizeof (spa_iostats_t) / sizeof (kstat_named_t),
1011eac7052fSMatt Macy KSTAT_FLAG_VIRTUAL);
1012eac7052fSMatt Macy
1013eac7052fSMatt Macy shk->kstat = ksp;
1014eac7052fSMatt Macy if (ksp) {
1015eac7052fSMatt Macy int size = sizeof (spa_iostats_t);
1016eac7052fSMatt Macy ksp->ks_lock = &shk->lock;
1017eac7052fSMatt Macy ksp->ks_private = spa;
1018eac7052fSMatt Macy ksp->ks_update = spa_iostats_update;
1019eac7052fSMatt Macy ksp->ks_data = kmem_alloc(size, KM_SLEEP);
1020eac7052fSMatt Macy memcpy(ksp->ks_data, &spa_iostats_template, size);
1021eac7052fSMatt Macy kstat_install(ksp);
1022eac7052fSMatt Macy }
1023eac7052fSMatt Macy
1024eac7052fSMatt Macy kmem_strfree(name);
1025eac7052fSMatt Macy }
1026eac7052fSMatt Macy
1027eac7052fSMatt Macy static void
spa_iostats_destroy(spa_t * spa)1028eac7052fSMatt Macy spa_iostats_destroy(spa_t *spa)
1029eac7052fSMatt Macy {
1030eac7052fSMatt Macy spa_history_kstat_t *shk = &spa->spa_stats.iostats;
1031eac7052fSMatt Macy kstat_t *ksp = shk->kstat;
1032eac7052fSMatt Macy if (ksp) {
1033eac7052fSMatt Macy kmem_free(ksp->ks_data, sizeof (spa_iostats_t));
1034eac7052fSMatt Macy kstat_delete(ksp);
1035eac7052fSMatt Macy }
1036eac7052fSMatt Macy
1037eac7052fSMatt Macy mutex_destroy(&shk->lock);
1038eac7052fSMatt Macy }
1039eac7052fSMatt Macy
1040eac7052fSMatt Macy void
spa_stats_init(spa_t * spa)1041eac7052fSMatt Macy spa_stats_init(spa_t *spa)
1042eac7052fSMatt Macy {
1043eac7052fSMatt Macy spa_read_history_init(spa);
1044eac7052fSMatt Macy spa_txg_history_init(spa);
1045eac7052fSMatt Macy spa_tx_assign_init(spa);
1046eac7052fSMatt Macy spa_mmp_history_init(spa);
1047eac7052fSMatt Macy spa_state_init(spa);
1048e3aa18adSMartin Matuska spa_guid_init(spa);
1049eac7052fSMatt Macy spa_iostats_init(spa);
1050eac7052fSMatt Macy }
1051eac7052fSMatt Macy
1052eac7052fSMatt Macy void
spa_stats_destroy(spa_t * spa)1053eac7052fSMatt Macy spa_stats_destroy(spa_t *spa)
1054eac7052fSMatt Macy {
1055eac7052fSMatt Macy spa_iostats_destroy(spa);
1056eac7052fSMatt Macy spa_health_destroy(spa);
1057eac7052fSMatt Macy spa_tx_assign_destroy(spa);
1058eac7052fSMatt Macy spa_txg_history_destroy(spa);
1059eac7052fSMatt Macy spa_read_history_destroy(spa);
1060eac7052fSMatt Macy spa_mmp_history_destroy(spa);
1061e3aa18adSMartin Matuska spa_guid_destroy(spa);
1062eac7052fSMatt Macy }
1063eac7052fSMatt Macy
1064be181ee2SMartin Matuska ZFS_MODULE_PARAM(zfs, zfs_, read_history, UINT, ZMOD_RW,
1065eac7052fSMatt Macy "Historical statistics for the last N reads");
1066eac7052fSMatt Macy
1067eac7052fSMatt Macy ZFS_MODULE_PARAM(zfs, zfs_, read_history_hits, INT, ZMOD_RW,
1068eac7052fSMatt Macy "Include cache hits in read history");
1069eac7052fSMatt Macy
1070be181ee2SMartin Matuska ZFS_MODULE_PARAM(zfs_txg, zfs_txg_, history, UINT, ZMOD_RW,
1071eac7052fSMatt Macy "Historical statistics for the last N txgs");
1072eac7052fSMatt Macy
1073be181ee2SMartin Matuska ZFS_MODULE_PARAM(zfs_multihost, zfs_multihost_, history, UINT, ZMOD_RW,
1074eac7052fSMatt Macy "Historical statistics for last N multihost writes");
1075