1eac7052fSMatt Macy /*
2eac7052fSMatt Macy * CDDL HEADER START
3eac7052fSMatt Macy *
4eac7052fSMatt Macy * The contents of this file are subject to the terms of the
5eac7052fSMatt Macy * Common Development and Distribution License (the "License").
6eac7052fSMatt Macy * You may not use this file except in compliance with the License.
7eac7052fSMatt Macy *
8eac7052fSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9271171e0SMartin Matuska * or https://opensource.org/licenses/CDDL-1.0.
10eac7052fSMatt Macy * See the License for the specific language governing permissions
11eac7052fSMatt Macy * and limitations under the License.
12eac7052fSMatt Macy *
13eac7052fSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each
14eac7052fSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15eac7052fSMatt Macy * If applicable, add the following below this CDDL HEADER, with the
16eac7052fSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying
17eac7052fSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner]
18eac7052fSMatt Macy *
19eac7052fSMatt Macy * CDDL HEADER END
20eac7052fSMatt Macy */
21eac7052fSMatt Macy
22eac7052fSMatt Macy #include <sys/zfs_context.h>
23eac7052fSMatt Macy #include <sys/spa_impl.h>
24eac7052fSMatt Macy #include <sys/vdev_impl.h>
25eac7052fSMatt Macy #include <sys/spa.h>
26eac7052fSMatt Macy #include <zfs_comutil.h>
27eac7052fSMatt Macy
28eac7052fSMatt Macy /*
29eac7052fSMatt Macy * Keeps stats on last N reads per spa_t, disabled by default.
30eac7052fSMatt Macy */
31be181ee2SMartin Matuska static uint_t zfs_read_history = B_FALSE;
32eac7052fSMatt Macy
33eac7052fSMatt Macy /*
34eac7052fSMatt Macy * Include cache hits in history, disabled by default.
35eac7052fSMatt Macy */
36e92ffd9bSMartin Matuska static int zfs_read_history_hits = B_FALSE;
37eac7052fSMatt Macy
38eac7052fSMatt Macy /*
39eac7052fSMatt Macy * Keeps stats on the last 100 txgs by default.
40eac7052fSMatt Macy */
41be181ee2SMartin Matuska static uint_t zfs_txg_history = 100;
42eac7052fSMatt Macy
43eac7052fSMatt Macy /*
44eac7052fSMatt Macy * Keeps stats on the last N MMP updates, disabled by default.
45eac7052fSMatt Macy */
46be181ee2SMartin Matuska static uint_t zfs_multihost_history = B_FALSE;
47eac7052fSMatt Macy
48eac7052fSMatt Macy /*
49eac7052fSMatt Macy * ==========================================================================
50eac7052fSMatt Macy * SPA Read History Routines
51eac7052fSMatt Macy * ==========================================================================
52eac7052fSMatt Macy */
53eac7052fSMatt Macy
54eac7052fSMatt Macy /*
55eac7052fSMatt Macy * Read statistics - Information exported regarding each arc_read call
56eac7052fSMatt Macy */
57eac7052fSMatt Macy typedef struct spa_read_history {
58eac7052fSMatt Macy hrtime_t start; /* time read completed */
59eac7052fSMatt Macy uint64_t objset; /* read from this objset */
60eac7052fSMatt Macy uint64_t object; /* read of this object number */
61eac7052fSMatt Macy uint64_t level; /* block's indirection level */
62eac7052fSMatt Macy uint64_t blkid; /* read of this block id */
63eac7052fSMatt Macy char origin[24]; /* read originated from here */
64eac7052fSMatt Macy uint32_t aflags; /* ARC flags (cached, prefetch, etc.) */
65eac7052fSMatt Macy pid_t pid; /* PID of task doing read */
66eac7052fSMatt Macy char comm[16]; /* process name of task doing read */
67eac7052fSMatt Macy procfs_list_node_t srh_node;
68eac7052fSMatt Macy } spa_read_history_t;
69eac7052fSMatt Macy
70eac7052fSMatt Macy static int
spa_read_history_show_header(struct seq_file * f)71eac7052fSMatt Macy spa_read_history_show_header(struct seq_file *f)
72eac7052fSMatt Macy {
73eac7052fSMatt Macy seq_printf(f, "%-8s %-16s %-8s %-8s %-8s %-8s %-8s "
74eac7052fSMatt Macy "%-24s %-8s %-16s\n", "UID", "start", "objset", "object",
75eac7052fSMatt Macy "level", "blkid", "aflags", "origin", "pid", "process");
76eac7052fSMatt Macy
77eac7052fSMatt Macy return (0);
78eac7052fSMatt Macy }
79eac7052fSMatt Macy
80eac7052fSMatt Macy static int
spa_read_history_show(struct seq_file * f,void * data)81eac7052fSMatt Macy spa_read_history_show(struct seq_file *f, void *data)
82eac7052fSMatt Macy {
83eac7052fSMatt Macy spa_read_history_t *srh = (spa_read_history_t *)data;
84eac7052fSMatt Macy
85eac7052fSMatt Macy seq_printf(f, "%-8llu %-16llu 0x%-6llx "
86eac7052fSMatt Macy "%-8lli %-8lli %-8lli 0x%-6x %-24s %-8i %-16s\n",
87eac7052fSMatt Macy (u_longlong_t)srh->srh_node.pln_id, srh->start,
88eac7052fSMatt Macy (longlong_t)srh->objset, (longlong_t)srh->object,
89eac7052fSMatt Macy (longlong_t)srh->level, (longlong_t)srh->blkid,
90eac7052fSMatt Macy srh->aflags, srh->origin, srh->pid, srh->comm);
91eac7052fSMatt Macy
92eac7052fSMatt Macy return (0);
93eac7052fSMatt Macy }
94eac7052fSMatt Macy
95eac7052fSMatt Macy /* Remove oldest elements from list until there are no more than 'size' left */
96eac7052fSMatt Macy static void
spa_read_history_truncate(spa_history_list_t * shl,unsigned int size)97eac7052fSMatt Macy spa_read_history_truncate(spa_history_list_t *shl, unsigned int size)
98eac7052fSMatt Macy {
99eac7052fSMatt Macy spa_read_history_t *srh;
100eac7052fSMatt Macy while (shl->size > size) {
101eac7052fSMatt Macy srh = list_remove_head(&shl->procfs_list.pl_list);
102eac7052fSMatt Macy ASSERT3P(srh, !=, NULL);
103eac7052fSMatt Macy kmem_free(srh, sizeof (spa_read_history_t));
104eac7052fSMatt Macy shl->size--;
105eac7052fSMatt Macy }
106eac7052fSMatt Macy
107eac7052fSMatt Macy if (size == 0)
108eac7052fSMatt Macy ASSERT(list_is_empty(&shl->procfs_list.pl_list));
109eac7052fSMatt Macy }
110eac7052fSMatt Macy
111eac7052fSMatt Macy static int
spa_read_history_clear(procfs_list_t * procfs_list)112eac7052fSMatt Macy spa_read_history_clear(procfs_list_t *procfs_list)
113eac7052fSMatt Macy {
114eac7052fSMatt Macy spa_history_list_t *shl = procfs_list->pl_private;
115eac7052fSMatt Macy mutex_enter(&procfs_list->pl_lock);
116eac7052fSMatt Macy spa_read_history_truncate(shl, 0);
117eac7052fSMatt Macy mutex_exit(&procfs_list->pl_lock);
118eac7052fSMatt Macy return (0);
119eac7052fSMatt Macy }
120eac7052fSMatt Macy
121eac7052fSMatt Macy static void
spa_read_history_init(spa_t * spa)122eac7052fSMatt Macy spa_read_history_init(spa_t *spa)
123eac7052fSMatt Macy {
124eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.read_history;
125eac7052fSMatt Macy
126eac7052fSMatt Macy shl->size = 0;
127eac7052fSMatt Macy shl->procfs_list.pl_private = shl;
128c40487d4SMatt Macy procfs_list_install("zfs",
129c40487d4SMatt Macy spa_name(spa),
130eac7052fSMatt Macy "reads",
131eac7052fSMatt Macy 0600,
132eac7052fSMatt Macy &shl->procfs_list,
133eac7052fSMatt Macy spa_read_history_show,
134eac7052fSMatt Macy spa_read_history_show_header,
135eac7052fSMatt Macy spa_read_history_clear,
136eac7052fSMatt Macy offsetof(spa_read_history_t, srh_node));
137eac7052fSMatt Macy }
138eac7052fSMatt Macy
139eac7052fSMatt Macy static void
spa_read_history_destroy(spa_t * spa)140eac7052fSMatt Macy spa_read_history_destroy(spa_t *spa)
141eac7052fSMatt Macy {
142eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.read_history;
143eac7052fSMatt Macy procfs_list_uninstall(&shl->procfs_list);
144eac7052fSMatt Macy spa_read_history_truncate(shl, 0);
145eac7052fSMatt Macy procfs_list_destroy(&shl->procfs_list);
146eac7052fSMatt Macy }
147eac7052fSMatt Macy
148eac7052fSMatt Macy void
spa_read_history_add(spa_t * spa,const zbookmark_phys_t * zb,uint32_t aflags)149eac7052fSMatt Macy spa_read_history_add(spa_t *spa, const zbookmark_phys_t *zb, uint32_t aflags)
150eac7052fSMatt Macy {
151eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.read_history;
152eac7052fSMatt Macy spa_read_history_t *srh;
153eac7052fSMatt Macy
154eac7052fSMatt Macy ASSERT3P(spa, !=, NULL);
155eac7052fSMatt Macy ASSERT3P(zb, !=, NULL);
156eac7052fSMatt Macy
157eac7052fSMatt Macy if (zfs_read_history == 0 && shl->size == 0)
158eac7052fSMatt Macy return;
159eac7052fSMatt Macy
160eac7052fSMatt Macy if (zfs_read_history_hits == 0 && (aflags & ARC_FLAG_CACHED))
161eac7052fSMatt Macy return;
162eac7052fSMatt Macy
163eac7052fSMatt Macy srh = kmem_zalloc(sizeof (spa_read_history_t), KM_SLEEP);
164eac7052fSMatt Macy strlcpy(srh->comm, getcomm(), sizeof (srh->comm));
165eac7052fSMatt Macy srh->start = gethrtime();
166eac7052fSMatt Macy srh->objset = zb->zb_objset;
167eac7052fSMatt Macy srh->object = zb->zb_object;
168eac7052fSMatt Macy srh->level = zb->zb_level;
169eac7052fSMatt Macy srh->blkid = zb->zb_blkid;
170eac7052fSMatt Macy srh->aflags = aflags;
171eac7052fSMatt Macy srh->pid = getpid();
172eac7052fSMatt Macy
173eac7052fSMatt Macy mutex_enter(&shl->procfs_list.pl_lock);
174eac7052fSMatt Macy
175eac7052fSMatt Macy procfs_list_add(&shl->procfs_list, srh);
176eac7052fSMatt Macy shl->size++;
177eac7052fSMatt Macy
178eac7052fSMatt Macy spa_read_history_truncate(shl, zfs_read_history);
179eac7052fSMatt Macy
180eac7052fSMatt Macy mutex_exit(&shl->procfs_list.pl_lock);
181eac7052fSMatt Macy }
182eac7052fSMatt Macy
183eac7052fSMatt Macy /*
184eac7052fSMatt Macy * ==========================================================================
185eac7052fSMatt Macy * SPA TXG History Routines
186eac7052fSMatt Macy * ==========================================================================
187eac7052fSMatt Macy */
188eac7052fSMatt Macy
189eac7052fSMatt Macy /*
190eac7052fSMatt Macy * Txg statistics - Information exported regarding each txg sync
191eac7052fSMatt Macy */
192eac7052fSMatt Macy
193eac7052fSMatt Macy typedef struct spa_txg_history {
194eac7052fSMatt Macy uint64_t txg; /* txg id */
195eac7052fSMatt Macy txg_state_t state; /* active txg state */
196eac7052fSMatt Macy uint64_t nread; /* number of bytes read */
197eac7052fSMatt Macy uint64_t nwritten; /* number of bytes written */
198eac7052fSMatt Macy uint64_t reads; /* number of read operations */
199eac7052fSMatt Macy uint64_t writes; /* number of write operations */
200eac7052fSMatt Macy uint64_t ndirty; /* number of dirty bytes */
201eac7052fSMatt Macy hrtime_t times[TXG_STATE_COMMITTED]; /* completion times */
202eac7052fSMatt Macy procfs_list_node_t sth_node;
203eac7052fSMatt Macy } spa_txg_history_t;
204eac7052fSMatt Macy
205eac7052fSMatt Macy static int
spa_txg_history_show_header(struct seq_file * f)206eac7052fSMatt Macy spa_txg_history_show_header(struct seq_file *f)
207eac7052fSMatt Macy {
208eac7052fSMatt Macy seq_printf(f, "%-8s %-16s %-5s %-12s %-12s %-12s "
209eac7052fSMatt Macy "%-8s %-8s %-12s %-12s %-12s %-12s\n", "txg", "birth", "state",
210eac7052fSMatt Macy "ndirty", "nread", "nwritten", "reads", "writes",
211eac7052fSMatt Macy "otime", "qtime", "wtime", "stime");
212eac7052fSMatt Macy return (0);
213eac7052fSMatt Macy }
214eac7052fSMatt Macy
215eac7052fSMatt Macy static int
spa_txg_history_show(struct seq_file * f,void * data)216eac7052fSMatt Macy spa_txg_history_show(struct seq_file *f, void *data)
217eac7052fSMatt Macy {
218eac7052fSMatt Macy spa_txg_history_t *sth = (spa_txg_history_t *)data;
219eac7052fSMatt Macy uint64_t open = 0, quiesce = 0, wait = 0, sync = 0;
220eac7052fSMatt Macy char state;
221eac7052fSMatt Macy
222eac7052fSMatt Macy switch (sth->state) {
223eac7052fSMatt Macy case TXG_STATE_BIRTH: state = 'B'; break;
224eac7052fSMatt Macy case TXG_STATE_OPEN: state = 'O'; break;
225eac7052fSMatt Macy case TXG_STATE_QUIESCED: state = 'Q'; break;
226eac7052fSMatt Macy case TXG_STATE_WAIT_FOR_SYNC: state = 'W'; break;
227eac7052fSMatt Macy case TXG_STATE_SYNCED: state = 'S'; break;
228eac7052fSMatt Macy case TXG_STATE_COMMITTED: state = 'C'; break;
229eac7052fSMatt Macy default: state = '?'; break;
230eac7052fSMatt Macy }
231eac7052fSMatt Macy
232eac7052fSMatt Macy if (sth->times[TXG_STATE_OPEN])
233eac7052fSMatt Macy open = sth->times[TXG_STATE_OPEN] -
234eac7052fSMatt Macy sth->times[TXG_STATE_BIRTH];
235eac7052fSMatt Macy
236eac7052fSMatt Macy if (sth->times[TXG_STATE_QUIESCED])
237eac7052fSMatt Macy quiesce = sth->times[TXG_STATE_QUIESCED] -
238eac7052fSMatt Macy sth->times[TXG_STATE_OPEN];
239eac7052fSMatt Macy
240eac7052fSMatt Macy if (sth->times[TXG_STATE_WAIT_FOR_SYNC])
241eac7052fSMatt Macy wait = sth->times[TXG_STATE_WAIT_FOR_SYNC] -
242eac7052fSMatt Macy sth->times[TXG_STATE_QUIESCED];
243eac7052fSMatt Macy
244eac7052fSMatt Macy if (sth->times[TXG_STATE_SYNCED])
245eac7052fSMatt Macy sync = sth->times[TXG_STATE_SYNCED] -
246eac7052fSMatt Macy sth->times[TXG_STATE_WAIT_FOR_SYNC];
247eac7052fSMatt Macy
248eac7052fSMatt Macy seq_printf(f, "%-8llu %-16llu %-5c %-12llu "
249eac7052fSMatt Macy "%-12llu %-12llu %-8llu %-8llu %-12llu %-12llu %-12llu %-12llu\n",
250eac7052fSMatt Macy (longlong_t)sth->txg, sth->times[TXG_STATE_BIRTH], state,
251eac7052fSMatt Macy (u_longlong_t)sth->ndirty,
252eac7052fSMatt Macy (u_longlong_t)sth->nread, (u_longlong_t)sth->nwritten,
253eac7052fSMatt Macy (u_longlong_t)sth->reads, (u_longlong_t)sth->writes,
254eac7052fSMatt Macy (u_longlong_t)open, (u_longlong_t)quiesce, (u_longlong_t)wait,
255eac7052fSMatt Macy (u_longlong_t)sync);
256eac7052fSMatt Macy
257eac7052fSMatt Macy return (0);
258eac7052fSMatt Macy }
259eac7052fSMatt Macy
260eac7052fSMatt Macy /* Remove oldest elements from list until there are no more than 'size' left */
261eac7052fSMatt Macy static void
spa_txg_history_truncate(spa_history_list_t * shl,unsigned int size)262eac7052fSMatt Macy spa_txg_history_truncate(spa_history_list_t *shl, unsigned int size)
263eac7052fSMatt Macy {
264eac7052fSMatt Macy spa_txg_history_t *sth;
265eac7052fSMatt Macy while (shl->size > size) {
266eac7052fSMatt Macy sth = list_remove_head(&shl->procfs_list.pl_list);
267eac7052fSMatt Macy ASSERT3P(sth, !=, NULL);
268eac7052fSMatt Macy kmem_free(sth, sizeof (spa_txg_history_t));
269eac7052fSMatt Macy shl->size--;
270eac7052fSMatt Macy }
271eac7052fSMatt Macy
272eac7052fSMatt Macy if (size == 0)
273eac7052fSMatt Macy ASSERT(list_is_empty(&shl->procfs_list.pl_list));
274eac7052fSMatt Macy
275eac7052fSMatt Macy }
276eac7052fSMatt Macy
277eac7052fSMatt Macy static int
spa_txg_history_clear(procfs_list_t * procfs_list)278eac7052fSMatt Macy spa_txg_history_clear(procfs_list_t *procfs_list)
279eac7052fSMatt Macy {
280eac7052fSMatt Macy spa_history_list_t *shl = procfs_list->pl_private;
281eac7052fSMatt Macy mutex_enter(&procfs_list->pl_lock);
282eac7052fSMatt Macy spa_txg_history_truncate(shl, 0);
283eac7052fSMatt Macy mutex_exit(&procfs_list->pl_lock);
284eac7052fSMatt Macy return (0);
285eac7052fSMatt Macy }
286eac7052fSMatt Macy
287eac7052fSMatt Macy static void
spa_txg_history_init(spa_t * spa)288eac7052fSMatt Macy spa_txg_history_init(spa_t *spa)
289eac7052fSMatt Macy {
290eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.txg_history;
291eac7052fSMatt Macy
292eac7052fSMatt Macy shl->size = 0;
293eac7052fSMatt Macy shl->procfs_list.pl_private = shl;
294c40487d4SMatt Macy procfs_list_install("zfs",
295c40487d4SMatt Macy spa_name(spa),
296eac7052fSMatt Macy "txgs",
297eac7052fSMatt Macy 0644,
298eac7052fSMatt Macy &shl->procfs_list,
299eac7052fSMatt Macy spa_txg_history_show,
300eac7052fSMatt Macy spa_txg_history_show_header,
301eac7052fSMatt Macy spa_txg_history_clear,
302eac7052fSMatt Macy offsetof(spa_txg_history_t, sth_node));
303eac7052fSMatt Macy }
304eac7052fSMatt Macy
305eac7052fSMatt Macy static void
spa_txg_history_destroy(spa_t * spa)306eac7052fSMatt Macy spa_txg_history_destroy(spa_t *spa)
307eac7052fSMatt Macy {
308eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.txg_history;
309eac7052fSMatt Macy procfs_list_uninstall(&shl->procfs_list);
310eac7052fSMatt Macy spa_txg_history_truncate(shl, 0);
311eac7052fSMatt Macy procfs_list_destroy(&shl->procfs_list);
312eac7052fSMatt Macy }
313eac7052fSMatt Macy
314eac7052fSMatt Macy /*
315eac7052fSMatt Macy * Add a new txg to historical record.
316eac7052fSMatt Macy */
317eac7052fSMatt Macy void
spa_txg_history_add(spa_t * spa,uint64_t txg,hrtime_t birth_time)318eac7052fSMatt Macy spa_txg_history_add(spa_t *spa, uint64_t txg, hrtime_t birth_time)
319eac7052fSMatt Macy {
320eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.txg_history;
321eac7052fSMatt Macy spa_txg_history_t *sth;
322eac7052fSMatt Macy
323eac7052fSMatt Macy if (zfs_txg_history == 0 && shl->size == 0)
324eac7052fSMatt Macy return;
325eac7052fSMatt Macy
326eac7052fSMatt Macy sth = kmem_zalloc(sizeof (spa_txg_history_t), KM_SLEEP);
327eac7052fSMatt Macy sth->txg = txg;
328eac7052fSMatt Macy sth->state = TXG_STATE_OPEN;
329eac7052fSMatt Macy sth->times[TXG_STATE_BIRTH] = birth_time;
330eac7052fSMatt Macy
331eac7052fSMatt Macy mutex_enter(&shl->procfs_list.pl_lock);
332eac7052fSMatt Macy procfs_list_add(&shl->procfs_list, sth);
333eac7052fSMatt Macy shl->size++;
334eac7052fSMatt Macy spa_txg_history_truncate(shl, zfs_txg_history);
335eac7052fSMatt Macy mutex_exit(&shl->procfs_list.pl_lock);
336eac7052fSMatt Macy }
337eac7052fSMatt Macy
338eac7052fSMatt Macy /*
339eac7052fSMatt Macy * Set txg state completion time and increment current state.
340eac7052fSMatt Macy */
341eac7052fSMatt Macy int
spa_txg_history_set(spa_t * spa,uint64_t txg,txg_state_t completed_state,hrtime_t completed_time)342eac7052fSMatt Macy spa_txg_history_set(spa_t *spa, uint64_t txg, txg_state_t completed_state,
343eac7052fSMatt Macy hrtime_t completed_time)
344eac7052fSMatt Macy {
345eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.txg_history;
346eac7052fSMatt Macy spa_txg_history_t *sth;
347eac7052fSMatt Macy int error = ENOENT;
348eac7052fSMatt Macy
349eac7052fSMatt Macy if (zfs_txg_history == 0)
350eac7052fSMatt Macy return (0);
351eac7052fSMatt Macy
352eac7052fSMatt Macy mutex_enter(&shl->procfs_list.pl_lock);
353eac7052fSMatt Macy for (sth = list_tail(&shl->procfs_list.pl_list); sth != NULL;
354eac7052fSMatt Macy sth = list_prev(&shl->procfs_list.pl_list, sth)) {
355eac7052fSMatt Macy if (sth->txg == txg) {
356eac7052fSMatt Macy sth->times[completed_state] = completed_time;
357eac7052fSMatt Macy sth->state++;
358eac7052fSMatt Macy error = 0;
359eac7052fSMatt Macy break;
360eac7052fSMatt Macy }
361eac7052fSMatt Macy }
362eac7052fSMatt Macy mutex_exit(&shl->procfs_list.pl_lock);
363eac7052fSMatt Macy
364eac7052fSMatt Macy return (error);
365eac7052fSMatt Macy }
366eac7052fSMatt Macy
367eac7052fSMatt Macy /*
368eac7052fSMatt Macy * Set txg IO stats.
369eac7052fSMatt Macy */
370eac7052fSMatt Macy static int
spa_txg_history_set_io(spa_t * spa,uint64_t txg,uint64_t nread,uint64_t nwritten,uint64_t reads,uint64_t writes,uint64_t ndirty)371eac7052fSMatt Macy spa_txg_history_set_io(spa_t *spa, uint64_t txg, uint64_t nread,
372eac7052fSMatt Macy uint64_t nwritten, uint64_t reads, uint64_t writes, uint64_t ndirty)
373eac7052fSMatt Macy {
374eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.txg_history;
375eac7052fSMatt Macy spa_txg_history_t *sth;
376eac7052fSMatt Macy int error = ENOENT;
377eac7052fSMatt Macy
378eac7052fSMatt Macy if (zfs_txg_history == 0)
379eac7052fSMatt Macy return (0);
380eac7052fSMatt Macy
381eac7052fSMatt Macy mutex_enter(&shl->procfs_list.pl_lock);
382eac7052fSMatt Macy for (sth = list_tail(&shl->procfs_list.pl_list); sth != NULL;
383eac7052fSMatt Macy sth = list_prev(&shl->procfs_list.pl_list, sth)) {
384eac7052fSMatt Macy if (sth->txg == txg) {
385eac7052fSMatt Macy sth->nread = nread;
386eac7052fSMatt Macy sth->nwritten = nwritten;
387eac7052fSMatt Macy sth->reads = reads;
388eac7052fSMatt Macy sth->writes = writes;
389eac7052fSMatt Macy sth->ndirty = ndirty;
390eac7052fSMatt Macy error = 0;
391eac7052fSMatt Macy break;
392eac7052fSMatt Macy }
393eac7052fSMatt Macy }
394eac7052fSMatt Macy mutex_exit(&shl->procfs_list.pl_lock);
395eac7052fSMatt Macy
396eac7052fSMatt Macy return (error);
397eac7052fSMatt Macy }
398eac7052fSMatt Macy
399eac7052fSMatt Macy txg_stat_t *
spa_txg_history_init_io(spa_t * spa,uint64_t txg,dsl_pool_t * dp)400eac7052fSMatt Macy spa_txg_history_init_io(spa_t *spa, uint64_t txg, dsl_pool_t *dp)
401eac7052fSMatt Macy {
402eac7052fSMatt Macy txg_stat_t *ts;
403eac7052fSMatt Macy
404eac7052fSMatt Macy if (zfs_txg_history == 0)
405eac7052fSMatt Macy return (NULL);
406eac7052fSMatt Macy
407eac7052fSMatt Macy ts = kmem_alloc(sizeof (txg_stat_t), KM_SLEEP);
408eac7052fSMatt Macy
409eac7052fSMatt Macy spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
410eac7052fSMatt Macy vdev_get_stats(spa->spa_root_vdev, &ts->vs1);
411eac7052fSMatt Macy spa_config_exit(spa, SCL_CONFIG, FTAG);
412eac7052fSMatt Macy
413eac7052fSMatt Macy ts->txg = txg;
414eac7052fSMatt Macy ts->ndirty = dp->dp_dirty_pertxg[txg & TXG_MASK];
415eac7052fSMatt Macy
416eac7052fSMatt Macy spa_txg_history_set(spa, txg, TXG_STATE_WAIT_FOR_SYNC, gethrtime());
417eac7052fSMatt Macy
418eac7052fSMatt Macy return (ts);
419eac7052fSMatt Macy }
420eac7052fSMatt Macy
421eac7052fSMatt Macy void
spa_txg_history_fini_io(spa_t * spa,txg_stat_t * ts)422eac7052fSMatt Macy spa_txg_history_fini_io(spa_t *spa, txg_stat_t *ts)
423eac7052fSMatt Macy {
424eac7052fSMatt Macy if (ts == NULL)
425eac7052fSMatt Macy return;
426eac7052fSMatt Macy
427eac7052fSMatt Macy if (zfs_txg_history == 0) {
428eac7052fSMatt Macy kmem_free(ts, sizeof (txg_stat_t));
429eac7052fSMatt Macy return;
430eac7052fSMatt Macy }
431eac7052fSMatt Macy
432eac7052fSMatt Macy spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
433eac7052fSMatt Macy vdev_get_stats(spa->spa_root_vdev, &ts->vs2);
434eac7052fSMatt Macy spa_config_exit(spa, SCL_CONFIG, FTAG);
435eac7052fSMatt Macy
436eac7052fSMatt Macy spa_txg_history_set(spa, ts->txg, TXG_STATE_SYNCED, gethrtime());
437eac7052fSMatt Macy spa_txg_history_set_io(spa, ts->txg,
438eac7052fSMatt Macy ts->vs2.vs_bytes[ZIO_TYPE_READ] - ts->vs1.vs_bytes[ZIO_TYPE_READ],
439eac7052fSMatt Macy ts->vs2.vs_bytes[ZIO_TYPE_WRITE] - ts->vs1.vs_bytes[ZIO_TYPE_WRITE],
440eac7052fSMatt Macy ts->vs2.vs_ops[ZIO_TYPE_READ] - ts->vs1.vs_ops[ZIO_TYPE_READ],
441eac7052fSMatt Macy ts->vs2.vs_ops[ZIO_TYPE_WRITE] - ts->vs1.vs_ops[ZIO_TYPE_WRITE],
442eac7052fSMatt Macy ts->ndirty);
443eac7052fSMatt Macy
444eac7052fSMatt Macy kmem_free(ts, sizeof (txg_stat_t));
445eac7052fSMatt Macy }
446eac7052fSMatt Macy
447eac7052fSMatt Macy /*
448eac7052fSMatt Macy * ==========================================================================
449eac7052fSMatt Macy * SPA TX Assign Histogram Routines
450eac7052fSMatt Macy * ==========================================================================
451eac7052fSMatt Macy */
452eac7052fSMatt Macy
453eac7052fSMatt Macy /*
454eac7052fSMatt Macy * Tx statistics - Information exported regarding dmu_tx_assign time.
455eac7052fSMatt Macy */
456eac7052fSMatt Macy
457eac7052fSMatt Macy /*
458eac7052fSMatt Macy * When the kstat is written zero all buckets. When the kstat is read
459eac7052fSMatt Macy * count the number of trailing buckets set to zero and update ks_ndata
460eac7052fSMatt Macy * such that they are not output.
461eac7052fSMatt Macy */
462eac7052fSMatt Macy static int
spa_tx_assign_update(kstat_t * ksp,int rw)463eac7052fSMatt Macy spa_tx_assign_update(kstat_t *ksp, int rw)
464eac7052fSMatt Macy {
465eac7052fSMatt Macy spa_t *spa = ksp->ks_private;
466eac7052fSMatt Macy spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram;
467eac7052fSMatt Macy int i;
468eac7052fSMatt Macy
469eac7052fSMatt Macy if (rw == KSTAT_WRITE) {
470eac7052fSMatt Macy for (i = 0; i < shk->count; i++)
471eac7052fSMatt Macy ((kstat_named_t *)shk->priv)[i].value.ui64 = 0;
472eac7052fSMatt Macy }
473eac7052fSMatt Macy
474eac7052fSMatt Macy for (i = shk->count; i > 0; i--)
475eac7052fSMatt Macy if (((kstat_named_t *)shk->priv)[i-1].value.ui64 != 0)
476eac7052fSMatt Macy break;
477eac7052fSMatt Macy
478eac7052fSMatt Macy ksp->ks_ndata = i;
479eac7052fSMatt Macy ksp->ks_data_size = i * sizeof (kstat_named_t);
480eac7052fSMatt Macy
481eac7052fSMatt Macy return (0);
482eac7052fSMatt Macy }
483eac7052fSMatt Macy
484eac7052fSMatt Macy static void
spa_tx_assign_init(spa_t * spa)485eac7052fSMatt Macy spa_tx_assign_init(spa_t *spa)
486eac7052fSMatt Macy {
487eac7052fSMatt Macy spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram;
488eac7052fSMatt Macy char *name;
489eac7052fSMatt Macy kstat_named_t *ks;
490eac7052fSMatt Macy kstat_t *ksp;
491eac7052fSMatt Macy int i;
492eac7052fSMatt Macy
493eac7052fSMatt Macy mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL);
494eac7052fSMatt Macy
495eac7052fSMatt Macy shk->count = 42; /* power of two buckets for 1ns to 2,199s */
496eac7052fSMatt Macy shk->size = shk->count * sizeof (kstat_named_t);
497eac7052fSMatt Macy shk->priv = kmem_alloc(shk->size, KM_SLEEP);
498eac7052fSMatt Macy
499eac7052fSMatt Macy name = kmem_asprintf("zfs/%s", spa_name(spa));
500eac7052fSMatt Macy
501eac7052fSMatt Macy for (i = 0; i < shk->count; i++) {
502eac7052fSMatt Macy ks = &((kstat_named_t *)shk->priv)[i];
503eac7052fSMatt Macy ks->data_type = KSTAT_DATA_UINT64;
504eac7052fSMatt Macy ks->value.ui64 = 0;
505eac7052fSMatt Macy (void) snprintf(ks->name, KSTAT_STRLEN, "%llu ns",
506eac7052fSMatt Macy (u_longlong_t)1 << i);
507eac7052fSMatt Macy }
508eac7052fSMatt Macy
509eac7052fSMatt Macy ksp = kstat_create(name, 0, "dmu_tx_assign", "misc",
510eac7052fSMatt Macy KSTAT_TYPE_NAMED, 0, KSTAT_FLAG_VIRTUAL);
511eac7052fSMatt Macy shk->kstat = ksp;
512eac7052fSMatt Macy
513eac7052fSMatt Macy if (ksp) {
514eac7052fSMatt Macy ksp->ks_lock = &shk->lock;
515eac7052fSMatt Macy ksp->ks_data = shk->priv;
516eac7052fSMatt Macy ksp->ks_ndata = shk->count;
517eac7052fSMatt Macy ksp->ks_data_size = shk->size;
518eac7052fSMatt Macy ksp->ks_private = spa;
519eac7052fSMatt Macy ksp->ks_update = spa_tx_assign_update;
520eac7052fSMatt Macy kstat_install(ksp);
521eac7052fSMatt Macy }
522eac7052fSMatt Macy kmem_strfree(name);
523eac7052fSMatt Macy }
524eac7052fSMatt Macy
525eac7052fSMatt Macy static void
spa_tx_assign_destroy(spa_t * spa)526eac7052fSMatt Macy spa_tx_assign_destroy(spa_t *spa)
527eac7052fSMatt Macy {
528eac7052fSMatt Macy spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram;
529eac7052fSMatt Macy kstat_t *ksp;
530eac7052fSMatt Macy
531eac7052fSMatt Macy ksp = shk->kstat;
532eac7052fSMatt Macy if (ksp)
533eac7052fSMatt Macy kstat_delete(ksp);
534eac7052fSMatt Macy
535eac7052fSMatt Macy kmem_free(shk->priv, shk->size);
536eac7052fSMatt Macy mutex_destroy(&shk->lock);
537eac7052fSMatt Macy }
538eac7052fSMatt Macy
539eac7052fSMatt Macy void
spa_tx_assign_add_nsecs(spa_t * spa,uint64_t nsecs)540eac7052fSMatt Macy spa_tx_assign_add_nsecs(spa_t *spa, uint64_t nsecs)
541eac7052fSMatt Macy {
542eac7052fSMatt Macy spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram;
543eac7052fSMatt Macy uint64_t idx = 0;
544eac7052fSMatt Macy
545eac7052fSMatt Macy while (((1ULL << idx) < nsecs) && (idx < shk->size - 1))
546eac7052fSMatt Macy idx++;
547eac7052fSMatt Macy
548eac7052fSMatt Macy atomic_inc_64(&((kstat_named_t *)shk->priv)[idx].value.ui64);
549eac7052fSMatt Macy }
550eac7052fSMatt Macy
551eac7052fSMatt Macy /*
552eac7052fSMatt Macy * ==========================================================================
553eac7052fSMatt Macy * SPA MMP History Routines
554eac7052fSMatt Macy * ==========================================================================
555eac7052fSMatt Macy */
556eac7052fSMatt Macy
557eac7052fSMatt Macy /*
558eac7052fSMatt Macy * MMP statistics - Information exported regarding attempted MMP writes
559eac7052fSMatt Macy * For MMP writes issued, fields used as per comments below.
560eac7052fSMatt Macy * For MMP writes skipped, an entry represents a span of time when
561eac7052fSMatt Macy * writes were skipped for same reason (error from mmp_random_leaf).
562eac7052fSMatt Macy * Differences are:
563eac7052fSMatt Macy * timestamp time first write skipped, if >1 skipped in a row
564eac7052fSMatt Macy * mmp_delay delay value at timestamp
565eac7052fSMatt Macy * vdev_guid number of writes skipped
566eac7052fSMatt Macy * io_error one of enum mmp_error
567eac7052fSMatt Macy * duration time span (ns) of skipped writes
568eac7052fSMatt Macy */
569eac7052fSMatt Macy
570eac7052fSMatt Macy typedef struct spa_mmp_history {
571eac7052fSMatt Macy uint64_t mmp_node_id; /* unique # for updates */
572eac7052fSMatt Macy uint64_t txg; /* txg of last sync */
573eac7052fSMatt Macy uint64_t timestamp; /* UTC time MMP write issued */
574eac7052fSMatt Macy uint64_t mmp_delay; /* mmp_thread.mmp_delay at timestamp */
575eac7052fSMatt Macy uint64_t vdev_guid; /* unique ID of leaf vdev */
576eac7052fSMatt Macy char *vdev_path;
577eac7052fSMatt Macy int vdev_label; /* vdev label */
578eac7052fSMatt Macy int io_error; /* error status of MMP write */
579eac7052fSMatt Macy hrtime_t error_start; /* hrtime of start of error period */
580eac7052fSMatt Macy hrtime_t duration; /* time from submission to completion */
581eac7052fSMatt Macy procfs_list_node_t smh_node;
582eac7052fSMatt Macy } spa_mmp_history_t;
583eac7052fSMatt Macy
584eac7052fSMatt Macy static int
spa_mmp_history_show_header(struct seq_file * f)585eac7052fSMatt Macy spa_mmp_history_show_header(struct seq_file *f)
586eac7052fSMatt Macy {
587eac7052fSMatt Macy seq_printf(f, "%-10s %-10s %-10s %-6s %-10s %-12s %-24s "
588eac7052fSMatt Macy "%-10s %s\n", "id", "txg", "timestamp", "error", "duration",
589eac7052fSMatt Macy "mmp_delay", "vdev_guid", "vdev_label", "vdev_path");
590eac7052fSMatt Macy return (0);
591eac7052fSMatt Macy }
592eac7052fSMatt Macy
593eac7052fSMatt Macy static int
spa_mmp_history_show(struct seq_file * f,void * data)594eac7052fSMatt Macy spa_mmp_history_show(struct seq_file *f, void *data)
595eac7052fSMatt Macy {
596eac7052fSMatt Macy spa_mmp_history_t *smh = (spa_mmp_history_t *)data;
597eac7052fSMatt Macy char skip_fmt[] = "%-10llu %-10llu %10llu %#6llx %10lld %12llu %-24llu "
598eac7052fSMatt Macy "%-10lld %s\n";
599eac7052fSMatt Macy char write_fmt[] = "%-10llu %-10llu %10llu %6lld %10lld %12llu %-24llu "
600eac7052fSMatt Macy "%-10lld %s\n";
601eac7052fSMatt Macy
602eac7052fSMatt Macy seq_printf(f, (smh->error_start ? skip_fmt : write_fmt),
603eac7052fSMatt Macy (u_longlong_t)smh->mmp_node_id, (u_longlong_t)smh->txg,
604eac7052fSMatt Macy (u_longlong_t)smh->timestamp, (longlong_t)smh->io_error,
605eac7052fSMatt Macy (longlong_t)smh->duration, (u_longlong_t)smh->mmp_delay,
606eac7052fSMatt Macy (u_longlong_t)smh->vdev_guid, (u_longlong_t)smh->vdev_label,
607eac7052fSMatt Macy (smh->vdev_path ? smh->vdev_path : "-"));
608eac7052fSMatt Macy
609eac7052fSMatt Macy return (0);
610eac7052fSMatt Macy }
611eac7052fSMatt Macy
612eac7052fSMatt Macy /* Remove oldest elements from list until there are no more than 'size' left */
613eac7052fSMatt Macy static void
spa_mmp_history_truncate(spa_history_list_t * shl,unsigned int size)614eac7052fSMatt Macy spa_mmp_history_truncate(spa_history_list_t *shl, unsigned int size)
615eac7052fSMatt Macy {
616eac7052fSMatt Macy spa_mmp_history_t *smh;
617eac7052fSMatt Macy while (shl->size > size) {
618eac7052fSMatt Macy smh = list_remove_head(&shl->procfs_list.pl_list);
619eac7052fSMatt Macy if (smh->vdev_path)
620eac7052fSMatt Macy kmem_strfree(smh->vdev_path);
621eac7052fSMatt Macy kmem_free(smh, sizeof (spa_mmp_history_t));
622eac7052fSMatt Macy shl->size--;
623eac7052fSMatt Macy }
624eac7052fSMatt Macy
625eac7052fSMatt Macy if (size == 0)
626eac7052fSMatt Macy ASSERT(list_is_empty(&shl->procfs_list.pl_list));
627eac7052fSMatt Macy
628eac7052fSMatt Macy }
629eac7052fSMatt Macy
630eac7052fSMatt Macy static int
spa_mmp_history_clear(procfs_list_t * procfs_list)631eac7052fSMatt Macy spa_mmp_history_clear(procfs_list_t *procfs_list)
632eac7052fSMatt Macy {
633eac7052fSMatt Macy spa_history_list_t *shl = procfs_list->pl_private;
634eac7052fSMatt Macy mutex_enter(&procfs_list->pl_lock);
635eac7052fSMatt Macy spa_mmp_history_truncate(shl, 0);
636eac7052fSMatt Macy mutex_exit(&procfs_list->pl_lock);
637eac7052fSMatt Macy return (0);
638eac7052fSMatt Macy }
639eac7052fSMatt Macy
640eac7052fSMatt Macy static void
spa_mmp_history_init(spa_t * spa)641eac7052fSMatt Macy spa_mmp_history_init(spa_t *spa)
642eac7052fSMatt Macy {
643eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.mmp_history;
644eac7052fSMatt Macy
645eac7052fSMatt Macy shl->size = 0;
646eac7052fSMatt Macy
647eac7052fSMatt Macy shl->procfs_list.pl_private = shl;
648c40487d4SMatt Macy procfs_list_install("zfs",
649c40487d4SMatt Macy spa_name(spa),
650eac7052fSMatt Macy "multihost",
651eac7052fSMatt Macy 0644,
652eac7052fSMatt Macy &shl->procfs_list,
653eac7052fSMatt Macy spa_mmp_history_show,
654eac7052fSMatt Macy spa_mmp_history_show_header,
655eac7052fSMatt Macy spa_mmp_history_clear,
656eac7052fSMatt Macy offsetof(spa_mmp_history_t, smh_node));
657eac7052fSMatt Macy }
658eac7052fSMatt Macy
659eac7052fSMatt Macy static void
spa_mmp_history_destroy(spa_t * spa)660eac7052fSMatt Macy spa_mmp_history_destroy(spa_t *spa)
661eac7052fSMatt Macy {
662eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.mmp_history;
663eac7052fSMatt Macy procfs_list_uninstall(&shl->procfs_list);
664eac7052fSMatt Macy spa_mmp_history_truncate(shl, 0);
665eac7052fSMatt Macy procfs_list_destroy(&shl->procfs_list);
666eac7052fSMatt Macy }
667eac7052fSMatt Macy
668eac7052fSMatt Macy /*
669eac7052fSMatt Macy * Set duration in existing "skip" record to how long we have waited for a leaf
670eac7052fSMatt Macy * vdev to become available.
671eac7052fSMatt Macy *
672eac7052fSMatt Macy * Important that we start search at the tail of the list where new
673eac7052fSMatt Macy * records are inserted, so this is normally an O(1) operation.
674eac7052fSMatt Macy */
675eac7052fSMatt Macy int
spa_mmp_history_set_skip(spa_t * spa,uint64_t mmp_node_id)676eac7052fSMatt Macy spa_mmp_history_set_skip(spa_t *spa, uint64_t mmp_node_id)
677eac7052fSMatt Macy {
678eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.mmp_history;
679eac7052fSMatt Macy spa_mmp_history_t *smh;
680eac7052fSMatt Macy int error = ENOENT;
681eac7052fSMatt Macy
682eac7052fSMatt Macy if (zfs_multihost_history == 0 && shl->size == 0)
683eac7052fSMatt Macy return (0);
684eac7052fSMatt Macy
685eac7052fSMatt Macy mutex_enter(&shl->procfs_list.pl_lock);
686eac7052fSMatt Macy for (smh = list_tail(&shl->procfs_list.pl_list); smh != NULL;
687eac7052fSMatt Macy smh = list_prev(&shl->procfs_list.pl_list, smh)) {
688eac7052fSMatt Macy if (smh->mmp_node_id == mmp_node_id) {
689eac7052fSMatt Macy ASSERT3U(smh->io_error, !=, 0);
690eac7052fSMatt Macy smh->duration = gethrtime() - smh->error_start;
691eac7052fSMatt Macy smh->vdev_guid++;
692eac7052fSMatt Macy error = 0;
693eac7052fSMatt Macy break;
694eac7052fSMatt Macy }
695eac7052fSMatt Macy }
696eac7052fSMatt Macy mutex_exit(&shl->procfs_list.pl_lock);
697eac7052fSMatt Macy
698eac7052fSMatt Macy return (error);
699eac7052fSMatt Macy }
700eac7052fSMatt Macy
701eac7052fSMatt Macy /*
702eac7052fSMatt Macy * Set MMP write duration and error status in existing record.
703eac7052fSMatt Macy * See comment re: search order above spa_mmp_history_set_skip().
704eac7052fSMatt Macy */
705eac7052fSMatt Macy int
spa_mmp_history_set(spa_t * spa,uint64_t mmp_node_id,int io_error,hrtime_t duration)706eac7052fSMatt Macy spa_mmp_history_set(spa_t *spa, uint64_t mmp_node_id, int io_error,
707eac7052fSMatt Macy hrtime_t duration)
708eac7052fSMatt Macy {
709eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.mmp_history;
710eac7052fSMatt Macy spa_mmp_history_t *smh;
711eac7052fSMatt Macy int error = ENOENT;
712eac7052fSMatt Macy
713eac7052fSMatt Macy if (zfs_multihost_history == 0 && shl->size == 0)
714eac7052fSMatt Macy return (0);
715eac7052fSMatt Macy
716eac7052fSMatt Macy mutex_enter(&shl->procfs_list.pl_lock);
717eac7052fSMatt Macy for (smh = list_tail(&shl->procfs_list.pl_list); smh != NULL;
718eac7052fSMatt Macy smh = list_prev(&shl->procfs_list.pl_list, smh)) {
719eac7052fSMatt Macy if (smh->mmp_node_id == mmp_node_id) {
720eac7052fSMatt Macy ASSERT(smh->io_error == 0);
721eac7052fSMatt Macy smh->io_error = io_error;
722eac7052fSMatt Macy smh->duration = duration;
723eac7052fSMatt Macy error = 0;
724eac7052fSMatt Macy break;
725eac7052fSMatt Macy }
726eac7052fSMatt Macy }
727eac7052fSMatt Macy mutex_exit(&shl->procfs_list.pl_lock);
728eac7052fSMatt Macy
729eac7052fSMatt Macy return (error);
730eac7052fSMatt Macy }
731eac7052fSMatt Macy
732eac7052fSMatt Macy /*
733eac7052fSMatt Macy * Add a new MMP historical record.
734eac7052fSMatt Macy * error == 0 : a write was issued.
735eac7052fSMatt Macy * error != 0 : a write was not issued because no leaves were found.
736eac7052fSMatt Macy */
737eac7052fSMatt Macy void
spa_mmp_history_add(spa_t * spa,uint64_t txg,uint64_t timestamp,uint64_t mmp_delay,vdev_t * vd,int label,uint64_t mmp_node_id,int error)738eac7052fSMatt Macy spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp,
739eac7052fSMatt Macy uint64_t mmp_delay, vdev_t *vd, int label, uint64_t mmp_node_id,
740eac7052fSMatt Macy int error)
741eac7052fSMatt Macy {
742eac7052fSMatt Macy spa_history_list_t *shl = &spa->spa_stats.mmp_history;
743eac7052fSMatt Macy spa_mmp_history_t *smh;
744eac7052fSMatt Macy
745eac7052fSMatt Macy if (zfs_multihost_history == 0 && shl->size == 0)
746eac7052fSMatt Macy return;
747eac7052fSMatt Macy
748eac7052fSMatt Macy smh = kmem_zalloc(sizeof (spa_mmp_history_t), KM_SLEEP);
749eac7052fSMatt Macy smh->txg = txg;
750eac7052fSMatt Macy smh->timestamp = timestamp;
751eac7052fSMatt Macy smh->mmp_delay = mmp_delay;
752eac7052fSMatt Macy if (vd) {
753eac7052fSMatt Macy smh->vdev_guid = vd->vdev_guid;
754eac7052fSMatt Macy if (vd->vdev_path)
755eac7052fSMatt Macy smh->vdev_path = kmem_strdup(vd->vdev_path);
756eac7052fSMatt Macy }
757eac7052fSMatt Macy smh->vdev_label = label;
758eac7052fSMatt Macy smh->mmp_node_id = mmp_node_id;
759eac7052fSMatt Macy
760eac7052fSMatt Macy if (error) {
761eac7052fSMatt Macy smh->io_error = error;
762eac7052fSMatt Macy smh->error_start = gethrtime();
763eac7052fSMatt Macy smh->vdev_guid = 1;
764eac7052fSMatt Macy }
765eac7052fSMatt Macy
766eac7052fSMatt Macy mutex_enter(&shl->procfs_list.pl_lock);
767eac7052fSMatt Macy procfs_list_add(&shl->procfs_list, smh);
768eac7052fSMatt Macy shl->size++;
769eac7052fSMatt Macy spa_mmp_history_truncate(shl, zfs_multihost_history);
770eac7052fSMatt Macy mutex_exit(&shl->procfs_list.pl_lock);
771eac7052fSMatt Macy }
772eac7052fSMatt Macy
773eac7052fSMatt Macy static void *
spa_state_addr(kstat_t * ksp,loff_t n)774eac7052fSMatt Macy spa_state_addr(kstat_t *ksp, loff_t n)
775eac7052fSMatt Macy {
776eac7052fSMatt Macy if (n == 0)
777eac7052fSMatt Macy return (ksp->ks_private); /* return the spa_t */
778eac7052fSMatt Macy return (NULL);
779eac7052fSMatt Macy }
780eac7052fSMatt Macy
781eac7052fSMatt Macy static int
spa_state_data(char * buf,size_t size,void * data)782eac7052fSMatt Macy spa_state_data(char *buf, size_t size, void *data)
783eac7052fSMatt Macy {
784eac7052fSMatt Macy spa_t *spa = (spa_t *)data;
785eac7052fSMatt Macy (void) snprintf(buf, size, "%s\n", spa_state_to_name(spa));
786eac7052fSMatt Macy return (0);
787eac7052fSMatt Macy }
788eac7052fSMatt Macy
789eac7052fSMatt Macy /*
790eac7052fSMatt Macy * Return the state of the pool in /proc/spl/kstat/zfs/<pool>/state.
791eac7052fSMatt Macy *
792eac7052fSMatt Macy * This is a lock-less read of the pool's state (unlike using 'zpool', which
793eac7052fSMatt Macy * can potentially block for seconds). Because it doesn't block, it can useful
794eac7052fSMatt Macy * as a pool heartbeat value.
795eac7052fSMatt Macy */
796eac7052fSMatt Macy static void
spa_state_init(spa_t * spa)797eac7052fSMatt Macy spa_state_init(spa_t *spa)
798eac7052fSMatt Macy {
799eac7052fSMatt Macy spa_history_kstat_t *shk = &spa->spa_stats.state;
800eac7052fSMatt Macy char *name;
801eac7052fSMatt Macy kstat_t *ksp;
802eac7052fSMatt Macy
803eac7052fSMatt Macy mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL);
804eac7052fSMatt Macy
805eac7052fSMatt Macy name = kmem_asprintf("zfs/%s", spa_name(spa));
806eac7052fSMatt Macy ksp = kstat_create(name, 0, "state", "misc",
807eac7052fSMatt Macy KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
808eac7052fSMatt Macy
809eac7052fSMatt Macy shk->kstat = ksp;
810eac7052fSMatt Macy if (ksp) {
811eac7052fSMatt Macy ksp->ks_lock = &shk->lock;
812eac7052fSMatt Macy ksp->ks_data = NULL;
813eac7052fSMatt Macy ksp->ks_private = spa;
814eac7052fSMatt Macy ksp->ks_flags |= KSTAT_FLAG_NO_HEADERS;
815eac7052fSMatt Macy kstat_set_raw_ops(ksp, NULL, spa_state_data, spa_state_addr);
816eac7052fSMatt Macy kstat_install(ksp);
817eac7052fSMatt Macy }
818eac7052fSMatt Macy
819eac7052fSMatt Macy kmem_strfree(name);
820eac7052fSMatt Macy }
821eac7052fSMatt Macy
822e3aa18adSMartin Matuska static int
spa_guid_data(char * buf,size_t size,void * data)823e3aa18adSMartin Matuska spa_guid_data(char *buf, size_t size, void *data)
824e3aa18adSMartin Matuska {
825e3aa18adSMartin Matuska spa_t *spa = (spa_t *)data;
826e3aa18adSMartin Matuska (void) snprintf(buf, size, "%llu\n", (u_longlong_t)spa_guid(spa));
827e3aa18adSMartin Matuska return (0);
828e3aa18adSMartin Matuska }
829e3aa18adSMartin Matuska
830e3aa18adSMartin Matuska static void
spa_guid_init(spa_t * spa)831e3aa18adSMartin Matuska spa_guid_init(spa_t *spa)
832e3aa18adSMartin Matuska {
833e3aa18adSMartin Matuska spa_history_kstat_t *shk = &spa->spa_stats.guid;
834e3aa18adSMartin Matuska char *name;
835e3aa18adSMartin Matuska kstat_t *ksp;
836e3aa18adSMartin Matuska
837e3aa18adSMartin Matuska mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL);
838e3aa18adSMartin Matuska
839e3aa18adSMartin Matuska name = kmem_asprintf("zfs/%s", spa_name(spa));
840e3aa18adSMartin Matuska
841e3aa18adSMartin Matuska ksp = kstat_create(name, 0, "guid", "misc",
842e3aa18adSMartin Matuska KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
843e3aa18adSMartin Matuska
844e3aa18adSMartin Matuska shk->kstat = ksp;
845e3aa18adSMartin Matuska if (ksp) {
846e3aa18adSMartin Matuska ksp->ks_lock = &shk->lock;
847e3aa18adSMartin Matuska ksp->ks_data = NULL;
848e3aa18adSMartin Matuska ksp->ks_private = spa;
849e3aa18adSMartin Matuska ksp->ks_flags |= KSTAT_FLAG_NO_HEADERS;
850e3aa18adSMartin Matuska kstat_set_raw_ops(ksp, NULL, spa_guid_data, spa_state_addr);
851e3aa18adSMartin Matuska kstat_install(ksp);
852e3aa18adSMartin Matuska }
853e3aa18adSMartin Matuska
854e3aa18adSMartin Matuska kmem_strfree(name);
855e3aa18adSMartin Matuska }
856e3aa18adSMartin Matuska
857eac7052fSMatt Macy static void
spa_health_destroy(spa_t * spa)858eac7052fSMatt Macy spa_health_destroy(spa_t *spa)
859eac7052fSMatt Macy {
860eac7052fSMatt Macy spa_history_kstat_t *shk = &spa->spa_stats.state;
861eac7052fSMatt Macy kstat_t *ksp = shk->kstat;
862eac7052fSMatt Macy if (ksp)
863eac7052fSMatt Macy kstat_delete(ksp);
864eac7052fSMatt Macy
865eac7052fSMatt Macy mutex_destroy(&shk->lock);
866eac7052fSMatt Macy }
867eac7052fSMatt Macy
868e3aa18adSMartin Matuska static void
spa_guid_destroy(spa_t * spa)869e3aa18adSMartin Matuska spa_guid_destroy(spa_t *spa)
870e3aa18adSMartin Matuska {
871e3aa18adSMartin Matuska spa_history_kstat_t *shk = &spa->spa_stats.guid;
872e3aa18adSMartin Matuska kstat_t *ksp = shk->kstat;
873e3aa18adSMartin Matuska if (ksp)
874e3aa18adSMartin Matuska kstat_delete(ksp);
875e3aa18adSMartin Matuska
876e3aa18adSMartin Matuska mutex_destroy(&shk->lock);
877e3aa18adSMartin Matuska }
878e3aa18adSMartin Matuska
879e92ffd9bSMartin Matuska static const spa_iostats_t spa_iostats_template = {
880eac7052fSMatt Macy { "trim_extents_written", KSTAT_DATA_UINT64 },
881eac7052fSMatt Macy { "trim_bytes_written", KSTAT_DATA_UINT64 },
882eac7052fSMatt Macy { "trim_extents_skipped", KSTAT_DATA_UINT64 },
883eac7052fSMatt Macy { "trim_bytes_skipped", KSTAT_DATA_UINT64 },
884eac7052fSMatt Macy { "trim_extents_failed", KSTAT_DATA_UINT64 },
885eac7052fSMatt Macy { "trim_bytes_failed", KSTAT_DATA_UINT64 },
886eac7052fSMatt Macy { "autotrim_extents_written", KSTAT_DATA_UINT64 },
887eac7052fSMatt Macy { "autotrim_bytes_written", KSTAT_DATA_UINT64 },
888eac7052fSMatt Macy { "autotrim_extents_skipped", KSTAT_DATA_UINT64 },
889eac7052fSMatt Macy { "autotrim_bytes_skipped", KSTAT_DATA_UINT64 },
890eac7052fSMatt Macy { "autotrim_extents_failed", KSTAT_DATA_UINT64 },
891eac7052fSMatt Macy { "autotrim_bytes_failed", KSTAT_DATA_UINT64 },
892eac7052fSMatt Macy { "simple_trim_extents_written", KSTAT_DATA_UINT64 },
893eac7052fSMatt Macy { "simple_trim_bytes_written", KSTAT_DATA_UINT64 },
894eac7052fSMatt Macy { "simple_trim_extents_skipped", KSTAT_DATA_UINT64 },
895eac7052fSMatt Macy { "simple_trim_bytes_skipped", KSTAT_DATA_UINT64 },
896eac7052fSMatt Macy { "simple_trim_extents_failed", KSTAT_DATA_UINT64 },
897eac7052fSMatt Macy { "simple_trim_bytes_failed", KSTAT_DATA_UINT64 },
898*7a7741afSMartin Matuska { "arc_read_count", KSTAT_DATA_UINT64 },
899*7a7741afSMartin Matuska { "arc_read_bytes", KSTAT_DATA_UINT64 },
900*7a7741afSMartin Matuska { "arc_write_count", KSTAT_DATA_UINT64 },
901*7a7741afSMartin Matuska { "arc_write_bytes", KSTAT_DATA_UINT64 },
902*7a7741afSMartin Matuska { "direct_read_count", KSTAT_DATA_UINT64 },
903*7a7741afSMartin Matuska { "direct_read_bytes", KSTAT_DATA_UINT64 },
904*7a7741afSMartin Matuska { "direct_write_count", KSTAT_DATA_UINT64 },
905*7a7741afSMartin Matuska { "direct_write_bytes", KSTAT_DATA_UINT64 },
906eac7052fSMatt Macy };
907eac7052fSMatt Macy
908eac7052fSMatt Macy #define SPA_IOSTATS_ADD(stat, val) \
909eac7052fSMatt Macy atomic_add_64(&iostats->stat.value.ui64, (val));
910eac7052fSMatt Macy
911eac7052fSMatt Macy void
spa_iostats_trim_add(spa_t * spa,trim_type_t type,uint64_t extents_written,uint64_t bytes_written,uint64_t extents_skipped,uint64_t bytes_skipped,uint64_t extents_failed,uint64_t bytes_failed)912eac7052fSMatt Macy spa_iostats_trim_add(spa_t *spa, trim_type_t type,
913eac7052fSMatt Macy uint64_t extents_written, uint64_t bytes_written,
914eac7052fSMatt Macy uint64_t extents_skipped, uint64_t bytes_skipped,
915eac7052fSMatt Macy uint64_t extents_failed, uint64_t bytes_failed)
916eac7052fSMatt Macy {
917eac7052fSMatt Macy spa_history_kstat_t *shk = &spa->spa_stats.iostats;
918eac7052fSMatt Macy kstat_t *ksp = shk->kstat;
919eac7052fSMatt Macy spa_iostats_t *iostats;
920eac7052fSMatt Macy
921eac7052fSMatt Macy if (ksp == NULL)
922eac7052fSMatt Macy return;
923eac7052fSMatt Macy
924eac7052fSMatt Macy iostats = ksp->ks_data;
925eac7052fSMatt Macy if (type == TRIM_TYPE_MANUAL) {
926eac7052fSMatt Macy SPA_IOSTATS_ADD(trim_extents_written, extents_written);
927eac7052fSMatt Macy SPA_IOSTATS_ADD(trim_bytes_written, bytes_written);
928eac7052fSMatt Macy SPA_IOSTATS_ADD(trim_extents_skipped, extents_skipped);
929eac7052fSMatt Macy SPA_IOSTATS_ADD(trim_bytes_skipped, bytes_skipped);
930eac7052fSMatt Macy SPA_IOSTATS_ADD(trim_extents_failed, extents_failed);
931eac7052fSMatt Macy SPA_IOSTATS_ADD(trim_bytes_failed, bytes_failed);
932eac7052fSMatt Macy } else if (type == TRIM_TYPE_AUTO) {
933eac7052fSMatt Macy SPA_IOSTATS_ADD(autotrim_extents_written, extents_written);
934eac7052fSMatt Macy SPA_IOSTATS_ADD(autotrim_bytes_written, bytes_written);
935eac7052fSMatt Macy SPA_IOSTATS_ADD(autotrim_extents_skipped, extents_skipped);
936eac7052fSMatt Macy SPA_IOSTATS_ADD(autotrim_bytes_skipped, bytes_skipped);
937eac7052fSMatt Macy SPA_IOSTATS_ADD(autotrim_extents_failed, extents_failed);
938eac7052fSMatt Macy SPA_IOSTATS_ADD(autotrim_bytes_failed, bytes_failed);
939eac7052fSMatt Macy } else {
940eac7052fSMatt Macy SPA_IOSTATS_ADD(simple_trim_extents_written, extents_written);
941eac7052fSMatt Macy SPA_IOSTATS_ADD(simple_trim_bytes_written, bytes_written);
942eac7052fSMatt Macy SPA_IOSTATS_ADD(simple_trim_extents_skipped, extents_skipped);
943eac7052fSMatt Macy SPA_IOSTATS_ADD(simple_trim_bytes_skipped, bytes_skipped);
944eac7052fSMatt Macy SPA_IOSTATS_ADD(simple_trim_extents_failed, extents_failed);
945eac7052fSMatt Macy SPA_IOSTATS_ADD(simple_trim_bytes_failed, bytes_failed);
946eac7052fSMatt Macy }
947eac7052fSMatt Macy }
948eac7052fSMatt Macy
949*7a7741afSMartin Matuska void
spa_iostats_read_add(spa_t * spa,uint64_t size,uint64_t iops,uint32_t flags)950*7a7741afSMartin Matuska spa_iostats_read_add(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags)
951*7a7741afSMartin Matuska {
952*7a7741afSMartin Matuska spa_history_kstat_t *shk = &spa->spa_stats.iostats;
953*7a7741afSMartin Matuska kstat_t *ksp = shk->kstat;
954*7a7741afSMartin Matuska
955*7a7741afSMartin Matuska if (ksp == NULL)
956*7a7741afSMartin Matuska return;
957*7a7741afSMartin Matuska
958*7a7741afSMartin Matuska spa_iostats_t *iostats = ksp->ks_data;
959*7a7741afSMartin Matuska if (flags & DMU_DIRECTIO) {
960*7a7741afSMartin Matuska SPA_IOSTATS_ADD(direct_read_count, iops);
961*7a7741afSMartin Matuska SPA_IOSTATS_ADD(direct_read_bytes, size);
962*7a7741afSMartin Matuska } else {
963*7a7741afSMartin Matuska SPA_IOSTATS_ADD(arc_read_count, iops);
964*7a7741afSMartin Matuska SPA_IOSTATS_ADD(arc_read_bytes, size);
965*7a7741afSMartin Matuska }
966*7a7741afSMartin Matuska }
967*7a7741afSMartin Matuska
968*7a7741afSMartin Matuska void
spa_iostats_write_add(spa_t * spa,uint64_t size,uint64_t iops,uint32_t flags)969*7a7741afSMartin Matuska spa_iostats_write_add(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags)
970*7a7741afSMartin Matuska {
971*7a7741afSMartin Matuska spa_history_kstat_t *shk = &spa->spa_stats.iostats;
972*7a7741afSMartin Matuska kstat_t *ksp = shk->kstat;
973*7a7741afSMartin Matuska
974*7a7741afSMartin Matuska if (ksp == NULL)
975*7a7741afSMartin Matuska return;
976*7a7741afSMartin Matuska
977*7a7741afSMartin Matuska spa_iostats_t *iostats = ksp->ks_data;
978*7a7741afSMartin Matuska if (flags & DMU_DIRECTIO) {
979*7a7741afSMartin Matuska SPA_IOSTATS_ADD(direct_write_count, iops);
980*7a7741afSMartin Matuska SPA_IOSTATS_ADD(direct_write_bytes, size);
981*7a7741afSMartin Matuska } else {
982*7a7741afSMartin Matuska SPA_IOSTATS_ADD(arc_write_count, iops);
983*7a7741afSMartin Matuska SPA_IOSTATS_ADD(arc_write_bytes, size);
984*7a7741afSMartin Matuska }
985*7a7741afSMartin Matuska }
986*7a7741afSMartin Matuska
987eac7052fSMatt Macy static int
spa_iostats_update(kstat_t * ksp,int rw)988eac7052fSMatt Macy spa_iostats_update(kstat_t *ksp, int rw)
989eac7052fSMatt Macy {
990eac7052fSMatt Macy if (rw == KSTAT_WRITE) {
991eac7052fSMatt Macy memcpy(ksp->ks_data, &spa_iostats_template,
992eac7052fSMatt Macy sizeof (spa_iostats_t));
993eac7052fSMatt Macy }
994eac7052fSMatt Macy
995eac7052fSMatt Macy return (0);
996eac7052fSMatt Macy }
997eac7052fSMatt Macy
998eac7052fSMatt Macy static void
spa_iostats_init(spa_t * spa)999eac7052fSMatt Macy spa_iostats_init(spa_t *spa)
1000eac7052fSMatt Macy {
1001eac7052fSMatt Macy spa_history_kstat_t *shk = &spa->spa_stats.iostats;
1002eac7052fSMatt Macy
1003eac7052fSMatt Macy mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL);
1004eac7052fSMatt Macy
1005eac7052fSMatt Macy char *name = kmem_asprintf("zfs/%s", spa_name(spa));
1006eac7052fSMatt Macy kstat_t *ksp = kstat_create(name, 0, "iostats", "misc",
1007eac7052fSMatt Macy KSTAT_TYPE_NAMED, sizeof (spa_iostats_t) / sizeof (kstat_named_t),
1008eac7052fSMatt Macy KSTAT_FLAG_VIRTUAL);
1009eac7052fSMatt Macy
1010eac7052fSMatt Macy shk->kstat = ksp;
1011eac7052fSMatt Macy if (ksp) {
1012eac7052fSMatt Macy int size = sizeof (spa_iostats_t);
1013eac7052fSMatt Macy ksp->ks_lock = &shk->lock;
1014eac7052fSMatt Macy ksp->ks_private = spa;
1015eac7052fSMatt Macy ksp->ks_update = spa_iostats_update;
1016eac7052fSMatt Macy ksp->ks_data = kmem_alloc(size, KM_SLEEP);
1017eac7052fSMatt Macy memcpy(ksp->ks_data, &spa_iostats_template, size);
1018eac7052fSMatt Macy kstat_install(ksp);
1019eac7052fSMatt Macy }
1020eac7052fSMatt Macy
1021eac7052fSMatt Macy kmem_strfree(name);
1022eac7052fSMatt Macy }
1023eac7052fSMatt Macy
1024eac7052fSMatt Macy static void
spa_iostats_destroy(spa_t * spa)1025eac7052fSMatt Macy spa_iostats_destroy(spa_t *spa)
1026eac7052fSMatt Macy {
1027eac7052fSMatt Macy spa_history_kstat_t *shk = &spa->spa_stats.iostats;
1028eac7052fSMatt Macy kstat_t *ksp = shk->kstat;
1029eac7052fSMatt Macy if (ksp) {
1030eac7052fSMatt Macy kmem_free(ksp->ks_data, sizeof (spa_iostats_t));
1031eac7052fSMatt Macy kstat_delete(ksp);
1032eac7052fSMatt Macy }
1033eac7052fSMatt Macy
1034eac7052fSMatt Macy mutex_destroy(&shk->lock);
1035eac7052fSMatt Macy }
1036eac7052fSMatt Macy
1037eac7052fSMatt Macy void
spa_stats_init(spa_t * spa)1038eac7052fSMatt Macy spa_stats_init(spa_t *spa)
1039eac7052fSMatt Macy {
1040eac7052fSMatt Macy spa_read_history_init(spa);
1041eac7052fSMatt Macy spa_txg_history_init(spa);
1042eac7052fSMatt Macy spa_tx_assign_init(spa);
1043eac7052fSMatt Macy spa_mmp_history_init(spa);
1044eac7052fSMatt Macy spa_state_init(spa);
1045e3aa18adSMartin Matuska spa_guid_init(spa);
1046eac7052fSMatt Macy spa_iostats_init(spa);
1047eac7052fSMatt Macy }
1048eac7052fSMatt Macy
1049eac7052fSMatt Macy void
spa_stats_destroy(spa_t * spa)1050eac7052fSMatt Macy spa_stats_destroy(spa_t *spa)
1051eac7052fSMatt Macy {
1052eac7052fSMatt Macy spa_iostats_destroy(spa);
1053eac7052fSMatt Macy spa_health_destroy(spa);
1054eac7052fSMatt Macy spa_tx_assign_destroy(spa);
1055eac7052fSMatt Macy spa_txg_history_destroy(spa);
1056eac7052fSMatt Macy spa_read_history_destroy(spa);
1057eac7052fSMatt Macy spa_mmp_history_destroy(spa);
1058e3aa18adSMartin Matuska spa_guid_destroy(spa);
1059eac7052fSMatt Macy }
1060eac7052fSMatt Macy
1061be181ee2SMartin Matuska ZFS_MODULE_PARAM(zfs, zfs_, read_history, UINT, ZMOD_RW,
1062eac7052fSMatt Macy "Historical statistics for the last N reads");
1063eac7052fSMatt Macy
1064eac7052fSMatt Macy ZFS_MODULE_PARAM(zfs, zfs_, read_history_hits, INT, ZMOD_RW,
1065eac7052fSMatt Macy "Include cache hits in read history");
1066eac7052fSMatt Macy
1067be181ee2SMartin Matuska ZFS_MODULE_PARAM(zfs_txg, zfs_txg_, history, UINT, ZMOD_RW,
1068eac7052fSMatt Macy "Historical statistics for the last N txgs");
1069eac7052fSMatt Macy
1070be181ee2SMartin Matuska ZFS_MODULE_PARAM(zfs_multihost, zfs_multihost_, history, UINT, ZMOD_RW,
1071eac7052fSMatt Macy "Historical statistics for last N multihost writes");
1072