xref: /linux/fs/ceph/subvolume_metrics.c (revision ac2dc6d57425ffa9629941d7c9d7c0e51082cb5a)
1*b1137e0bSAlex Markuze // SPDX-License-Identifier: GPL-2.0
2*b1137e0bSAlex Markuze #include <linux/ceph/ceph_debug.h>
3*b1137e0bSAlex Markuze 
4*b1137e0bSAlex Markuze #include <linux/math64.h>
5*b1137e0bSAlex Markuze #include <linux/slab.h>
6*b1137e0bSAlex Markuze #include <linux/seq_file.h>
7*b1137e0bSAlex Markuze 
8*b1137e0bSAlex Markuze #include "subvolume_metrics.h"
9*b1137e0bSAlex Markuze #include "mds_client.h"
10*b1137e0bSAlex Markuze #include "super.h"
11*b1137e0bSAlex Markuze 
12*b1137e0bSAlex Markuze /**
13*b1137e0bSAlex Markuze  * struct ceph_subvol_metric_rb_entry - Per-subvolume I/O metrics node
14*b1137e0bSAlex Markuze  * @node: Red-black tree linkage for tracker->tree
15*b1137e0bSAlex Markuze  * @subvolume_id: Subvolume identifier (key for rb-tree lookup)
16*b1137e0bSAlex Markuze  * @read_ops: Accumulated read operation count since last snapshot
17*b1137e0bSAlex Markuze  * @write_ops: Accumulated write operation count since last snapshot
18*b1137e0bSAlex Markuze  * @read_bytes: Accumulated bytes read since last snapshot
19*b1137e0bSAlex Markuze  * @write_bytes: Accumulated bytes written since last snapshot
20*b1137e0bSAlex Markuze  * @read_latency_us: Sum of read latencies in microseconds
21*b1137e0bSAlex Markuze  * @write_latency_us: Sum of write latencies in microseconds
22*b1137e0bSAlex Markuze  */
23*b1137e0bSAlex Markuze struct ceph_subvol_metric_rb_entry {
24*b1137e0bSAlex Markuze 	struct rb_node node;
25*b1137e0bSAlex Markuze 	u64 subvolume_id;
26*b1137e0bSAlex Markuze 	u64 read_ops;
27*b1137e0bSAlex Markuze 	u64 write_ops;
28*b1137e0bSAlex Markuze 	u64 read_bytes;
29*b1137e0bSAlex Markuze 	u64 write_bytes;
30*b1137e0bSAlex Markuze 	u64 read_latency_us;
31*b1137e0bSAlex Markuze 	u64 write_latency_us;
32*b1137e0bSAlex Markuze };
33*b1137e0bSAlex Markuze 
34*b1137e0bSAlex Markuze static struct kmem_cache *ceph_subvol_metric_entry_cachep;
35*b1137e0bSAlex Markuze 
ceph_subvolume_metrics_init(struct ceph_subvolume_metrics_tracker * tracker)36*b1137e0bSAlex Markuze void ceph_subvolume_metrics_init(struct ceph_subvolume_metrics_tracker *tracker)
37*b1137e0bSAlex Markuze {
38*b1137e0bSAlex Markuze 	spin_lock_init(&tracker->lock);
39*b1137e0bSAlex Markuze 	tracker->tree = RB_ROOT_CACHED;
40*b1137e0bSAlex Markuze 	tracker->nr_entries = 0;
41*b1137e0bSAlex Markuze 	tracker->enabled = false;
42*b1137e0bSAlex Markuze 	atomic64_set(&tracker->snapshot_attempts, 0);
43*b1137e0bSAlex Markuze 	atomic64_set(&tracker->snapshot_empty, 0);
44*b1137e0bSAlex Markuze 	atomic64_set(&tracker->snapshot_failures, 0);
45*b1137e0bSAlex Markuze 	atomic64_set(&tracker->record_calls, 0);
46*b1137e0bSAlex Markuze 	atomic64_set(&tracker->record_disabled, 0);
47*b1137e0bSAlex Markuze 	atomic64_set(&tracker->record_no_subvol, 0);
48*b1137e0bSAlex Markuze 	atomic64_set(&tracker->total_read_ops, 0);
49*b1137e0bSAlex Markuze 	atomic64_set(&tracker->total_read_bytes, 0);
50*b1137e0bSAlex Markuze 	atomic64_set(&tracker->total_write_ops, 0);
51*b1137e0bSAlex Markuze 	atomic64_set(&tracker->total_write_bytes, 0);
52*b1137e0bSAlex Markuze }
53*b1137e0bSAlex Markuze 
54*b1137e0bSAlex Markuze static struct ceph_subvol_metric_rb_entry *
__lookup_entry(struct ceph_subvolume_metrics_tracker * tracker,u64 subvol_id)55*b1137e0bSAlex Markuze __lookup_entry(struct ceph_subvolume_metrics_tracker *tracker, u64 subvol_id)
56*b1137e0bSAlex Markuze {
57*b1137e0bSAlex Markuze 	struct rb_node *node;
58*b1137e0bSAlex Markuze 
59*b1137e0bSAlex Markuze 	node = tracker->tree.rb_root.rb_node;
60*b1137e0bSAlex Markuze 	while (node) {
61*b1137e0bSAlex Markuze 		struct ceph_subvol_metric_rb_entry *entry =
62*b1137e0bSAlex Markuze 			rb_entry(node, struct ceph_subvol_metric_rb_entry, node);
63*b1137e0bSAlex Markuze 
64*b1137e0bSAlex Markuze 		if (subvol_id < entry->subvolume_id)
65*b1137e0bSAlex Markuze 			node = node->rb_left;
66*b1137e0bSAlex Markuze 		else if (subvol_id > entry->subvolume_id)
67*b1137e0bSAlex Markuze 			node = node->rb_right;
68*b1137e0bSAlex Markuze 		else
69*b1137e0bSAlex Markuze 			return entry;
70*b1137e0bSAlex Markuze 	}
71*b1137e0bSAlex Markuze 
72*b1137e0bSAlex Markuze 	return NULL;
73*b1137e0bSAlex Markuze }
74*b1137e0bSAlex Markuze 
75*b1137e0bSAlex Markuze static struct ceph_subvol_metric_rb_entry *
__insert_entry(struct ceph_subvolume_metrics_tracker * tracker,struct ceph_subvol_metric_rb_entry * entry)76*b1137e0bSAlex Markuze __insert_entry(struct ceph_subvolume_metrics_tracker *tracker,
77*b1137e0bSAlex Markuze 	       struct ceph_subvol_metric_rb_entry *entry)
78*b1137e0bSAlex Markuze {
79*b1137e0bSAlex Markuze 	struct rb_node **link = &tracker->tree.rb_root.rb_node;
80*b1137e0bSAlex Markuze 	struct rb_node *parent = NULL;
81*b1137e0bSAlex Markuze 	bool leftmost = true;
82*b1137e0bSAlex Markuze 
83*b1137e0bSAlex Markuze 	while (*link) {
84*b1137e0bSAlex Markuze 		struct ceph_subvol_metric_rb_entry *cur =
85*b1137e0bSAlex Markuze 			rb_entry(*link, struct ceph_subvol_metric_rb_entry, node);
86*b1137e0bSAlex Markuze 
87*b1137e0bSAlex Markuze 		parent = *link;
88*b1137e0bSAlex Markuze 		if (entry->subvolume_id < cur->subvolume_id)
89*b1137e0bSAlex Markuze 			link = &(*link)->rb_left;
90*b1137e0bSAlex Markuze 		else if (entry->subvolume_id > cur->subvolume_id) {
91*b1137e0bSAlex Markuze 			link = &(*link)->rb_right;
92*b1137e0bSAlex Markuze 			leftmost = false;
93*b1137e0bSAlex Markuze 		} else
94*b1137e0bSAlex Markuze 			return cur;
95*b1137e0bSAlex Markuze 	}
96*b1137e0bSAlex Markuze 
97*b1137e0bSAlex Markuze 	rb_link_node(&entry->node, parent, link);
98*b1137e0bSAlex Markuze 	rb_insert_color_cached(&entry->node, &tracker->tree, leftmost);
99*b1137e0bSAlex Markuze 	tracker->nr_entries++;
100*b1137e0bSAlex Markuze 	return entry;
101*b1137e0bSAlex Markuze }
102*b1137e0bSAlex Markuze 
ceph_subvolume_metrics_clear_locked(struct ceph_subvolume_metrics_tracker * tracker)103*b1137e0bSAlex Markuze static void ceph_subvolume_metrics_clear_locked(
104*b1137e0bSAlex Markuze 		struct ceph_subvolume_metrics_tracker *tracker)
105*b1137e0bSAlex Markuze {
106*b1137e0bSAlex Markuze 	struct rb_node *node = rb_first_cached(&tracker->tree);
107*b1137e0bSAlex Markuze 
108*b1137e0bSAlex Markuze 	while (node) {
109*b1137e0bSAlex Markuze 		struct ceph_subvol_metric_rb_entry *entry =
110*b1137e0bSAlex Markuze 			rb_entry(node, struct ceph_subvol_metric_rb_entry, node);
111*b1137e0bSAlex Markuze 		struct rb_node *next = rb_next(node);
112*b1137e0bSAlex Markuze 
113*b1137e0bSAlex Markuze 		rb_erase_cached(&entry->node, &tracker->tree);
114*b1137e0bSAlex Markuze 		tracker->nr_entries--;
115*b1137e0bSAlex Markuze 		kmem_cache_free(ceph_subvol_metric_entry_cachep, entry);
116*b1137e0bSAlex Markuze 		node = next;
117*b1137e0bSAlex Markuze 	}
118*b1137e0bSAlex Markuze 
119*b1137e0bSAlex Markuze 	tracker->tree = RB_ROOT_CACHED;
120*b1137e0bSAlex Markuze }
121*b1137e0bSAlex Markuze 
ceph_subvolume_metrics_destroy(struct ceph_subvolume_metrics_tracker * tracker)122*b1137e0bSAlex Markuze void ceph_subvolume_metrics_destroy(struct ceph_subvolume_metrics_tracker *tracker)
123*b1137e0bSAlex Markuze {
124*b1137e0bSAlex Markuze 	spin_lock(&tracker->lock);
125*b1137e0bSAlex Markuze 	ceph_subvolume_metrics_clear_locked(tracker);
126*b1137e0bSAlex Markuze 	tracker->enabled = false;
127*b1137e0bSAlex Markuze 	spin_unlock(&tracker->lock);
128*b1137e0bSAlex Markuze }
129*b1137e0bSAlex Markuze 
ceph_subvolume_metrics_enable(struct ceph_subvolume_metrics_tracker * tracker,bool enable)130*b1137e0bSAlex Markuze void ceph_subvolume_metrics_enable(struct ceph_subvolume_metrics_tracker *tracker,
131*b1137e0bSAlex Markuze 				   bool enable)
132*b1137e0bSAlex Markuze {
133*b1137e0bSAlex Markuze 	spin_lock(&tracker->lock);
134*b1137e0bSAlex Markuze 	if (enable) {
135*b1137e0bSAlex Markuze 		tracker->enabled = true;
136*b1137e0bSAlex Markuze 	} else {
137*b1137e0bSAlex Markuze 		tracker->enabled = false;
138*b1137e0bSAlex Markuze 		ceph_subvolume_metrics_clear_locked(tracker);
139*b1137e0bSAlex Markuze 	}
140*b1137e0bSAlex Markuze 	spin_unlock(&tracker->lock);
141*b1137e0bSAlex Markuze }
142*b1137e0bSAlex Markuze 
ceph_subvolume_metrics_record(struct ceph_subvolume_metrics_tracker * tracker,u64 subvol_id,bool is_write,size_t size,u64 latency_us)143*b1137e0bSAlex Markuze void ceph_subvolume_metrics_record(struct ceph_subvolume_metrics_tracker *tracker,
144*b1137e0bSAlex Markuze 				   u64 subvol_id, bool is_write,
145*b1137e0bSAlex Markuze 				   size_t size, u64 latency_us)
146*b1137e0bSAlex Markuze {
147*b1137e0bSAlex Markuze 	struct ceph_subvol_metric_rb_entry *entry, *new_entry = NULL;
148*b1137e0bSAlex Markuze 	bool retry = false;
149*b1137e0bSAlex Markuze 
150*b1137e0bSAlex Markuze 	/* CEPH_SUBVOLUME_ID_NONE (0) means unknown/unset subvolume */
151*b1137e0bSAlex Markuze 	if (!READ_ONCE(tracker->enabled) ||
152*b1137e0bSAlex Markuze 	    subvol_id == CEPH_SUBVOLUME_ID_NONE || !size || !latency_us)
153*b1137e0bSAlex Markuze 		return;
154*b1137e0bSAlex Markuze 
155*b1137e0bSAlex Markuze 	/*
156*b1137e0bSAlex Markuze 	 * Retry loop for lock-free allocation pattern:
157*b1137e0bSAlex Markuze 	 * 1. First iteration: lookup under lock, if miss -> drop lock, alloc, retry
158*b1137e0bSAlex Markuze 	 * 2. Second iteration: lookup again (may have been inserted), insert if still missing
159*b1137e0bSAlex Markuze 	 * 3. On race (another thread inserted same key): free our alloc, retry
160*b1137e0bSAlex Markuze 	 * All successful paths exit via return, so retry flag doesn't need reset.
161*b1137e0bSAlex Markuze 	 */
162*b1137e0bSAlex Markuze 	do {
163*b1137e0bSAlex Markuze 		spin_lock(&tracker->lock);
164*b1137e0bSAlex Markuze 		if (!tracker->enabled) {
165*b1137e0bSAlex Markuze 			spin_unlock(&tracker->lock);
166*b1137e0bSAlex Markuze 			if (new_entry)
167*b1137e0bSAlex Markuze 				kmem_cache_free(ceph_subvol_metric_entry_cachep, new_entry);
168*b1137e0bSAlex Markuze 			return;
169*b1137e0bSAlex Markuze 		}
170*b1137e0bSAlex Markuze 
171*b1137e0bSAlex Markuze 		entry = __lookup_entry(tracker, subvol_id);
172*b1137e0bSAlex Markuze 		if (!entry) {
173*b1137e0bSAlex Markuze 			if (!new_entry) {
174*b1137e0bSAlex Markuze 				spin_unlock(&tracker->lock);
175*b1137e0bSAlex Markuze 				new_entry = kmem_cache_zalloc(ceph_subvol_metric_entry_cachep,
176*b1137e0bSAlex Markuze 						      GFP_NOFS);
177*b1137e0bSAlex Markuze 				if (!new_entry)
178*b1137e0bSAlex Markuze 					return;
179*b1137e0bSAlex Markuze 				new_entry->subvolume_id = subvol_id;
180*b1137e0bSAlex Markuze 				retry = true;
181*b1137e0bSAlex Markuze 				continue;
182*b1137e0bSAlex Markuze 			}
183*b1137e0bSAlex Markuze 			entry = __insert_entry(tracker, new_entry);
184*b1137e0bSAlex Markuze 			if (entry != new_entry) {
185*b1137e0bSAlex Markuze 				/* raced with another insert */
186*b1137e0bSAlex Markuze 				spin_unlock(&tracker->lock);
187*b1137e0bSAlex Markuze 				kmem_cache_free(ceph_subvol_metric_entry_cachep, new_entry);
188*b1137e0bSAlex Markuze 				new_entry = NULL;
189*b1137e0bSAlex Markuze 				retry = true;
190*b1137e0bSAlex Markuze 				continue;
191*b1137e0bSAlex Markuze 			}
192*b1137e0bSAlex Markuze 			new_entry = NULL;
193*b1137e0bSAlex Markuze 		}
194*b1137e0bSAlex Markuze 
195*b1137e0bSAlex Markuze 		if (is_write) {
196*b1137e0bSAlex Markuze 			entry->write_ops++;
197*b1137e0bSAlex Markuze 			entry->write_bytes += size;
198*b1137e0bSAlex Markuze 			entry->write_latency_us += latency_us;
199*b1137e0bSAlex Markuze 			atomic64_inc(&tracker->total_write_ops);
200*b1137e0bSAlex Markuze 			atomic64_add(size, &tracker->total_write_bytes);
201*b1137e0bSAlex Markuze 		} else {
202*b1137e0bSAlex Markuze 			entry->read_ops++;
203*b1137e0bSAlex Markuze 			entry->read_bytes += size;
204*b1137e0bSAlex Markuze 			entry->read_latency_us += latency_us;
205*b1137e0bSAlex Markuze 			atomic64_inc(&tracker->total_read_ops);
206*b1137e0bSAlex Markuze 			atomic64_add(size, &tracker->total_read_bytes);
207*b1137e0bSAlex Markuze 		}
208*b1137e0bSAlex Markuze 		spin_unlock(&tracker->lock);
209*b1137e0bSAlex Markuze 		if (new_entry)
210*b1137e0bSAlex Markuze 			kmem_cache_free(ceph_subvol_metric_entry_cachep, new_entry);
211*b1137e0bSAlex Markuze 		return;
212*b1137e0bSAlex Markuze 	} while (retry);
213*b1137e0bSAlex Markuze }
214*b1137e0bSAlex Markuze 
ceph_subvolume_metrics_snapshot(struct ceph_subvolume_metrics_tracker * tracker,struct ceph_subvol_metric_snapshot ** out,u32 * nr,bool consume)215*b1137e0bSAlex Markuze int ceph_subvolume_metrics_snapshot(struct ceph_subvolume_metrics_tracker *tracker,
216*b1137e0bSAlex Markuze 				    struct ceph_subvol_metric_snapshot **out,
217*b1137e0bSAlex Markuze 				    u32 *nr, bool consume)
218*b1137e0bSAlex Markuze {
219*b1137e0bSAlex Markuze 	struct ceph_subvol_metric_snapshot *snap = NULL;
220*b1137e0bSAlex Markuze 	struct rb_node *node;
221*b1137e0bSAlex Markuze 	u32 count = 0, idx = 0;
222*b1137e0bSAlex Markuze 	int ret = 0;
223*b1137e0bSAlex Markuze 
224*b1137e0bSAlex Markuze 	*out = NULL;
225*b1137e0bSAlex Markuze 	*nr = 0;
226*b1137e0bSAlex Markuze 
227*b1137e0bSAlex Markuze 	if (!READ_ONCE(tracker->enabled))
228*b1137e0bSAlex Markuze 		return 0;
229*b1137e0bSAlex Markuze 
230*b1137e0bSAlex Markuze 	atomic64_inc(&tracker->snapshot_attempts);
231*b1137e0bSAlex Markuze 
232*b1137e0bSAlex Markuze 	spin_lock(&tracker->lock);
233*b1137e0bSAlex Markuze 	for (node = rb_first_cached(&tracker->tree); node; node = rb_next(node)) {
234*b1137e0bSAlex Markuze 		struct ceph_subvol_metric_rb_entry *entry =
235*b1137e0bSAlex Markuze 			rb_entry(node, struct ceph_subvol_metric_rb_entry, node);
236*b1137e0bSAlex Markuze 
237*b1137e0bSAlex Markuze 		/* Include entries with ANY I/O activity (read OR write) */
238*b1137e0bSAlex Markuze 		if (entry->read_ops || entry->write_ops)
239*b1137e0bSAlex Markuze 			count++;
240*b1137e0bSAlex Markuze 	}
241*b1137e0bSAlex Markuze 	spin_unlock(&tracker->lock);
242*b1137e0bSAlex Markuze 
243*b1137e0bSAlex Markuze 	if (!count) {
244*b1137e0bSAlex Markuze 		atomic64_inc(&tracker->snapshot_empty);
245*b1137e0bSAlex Markuze 		return 0;
246*b1137e0bSAlex Markuze 	}
247*b1137e0bSAlex Markuze 
248*b1137e0bSAlex Markuze 	snap = kcalloc(count, sizeof(*snap), GFP_NOFS);
249*b1137e0bSAlex Markuze 	if (!snap) {
250*b1137e0bSAlex Markuze 		atomic64_inc(&tracker->snapshot_failures);
251*b1137e0bSAlex Markuze 		return -ENOMEM;
252*b1137e0bSAlex Markuze 	}
253*b1137e0bSAlex Markuze 
254*b1137e0bSAlex Markuze 	spin_lock(&tracker->lock);
255*b1137e0bSAlex Markuze 	node = rb_first_cached(&tracker->tree);
256*b1137e0bSAlex Markuze 	while (node) {
257*b1137e0bSAlex Markuze 		struct ceph_subvol_metric_rb_entry *entry =
258*b1137e0bSAlex Markuze 			rb_entry(node, struct ceph_subvol_metric_rb_entry, node);
259*b1137e0bSAlex Markuze 		struct rb_node *next = rb_next(node);
260*b1137e0bSAlex Markuze 
261*b1137e0bSAlex Markuze 		/* Skip entries with NO I/O activity at all */
262*b1137e0bSAlex Markuze 		if (!entry->read_ops && !entry->write_ops) {
263*b1137e0bSAlex Markuze 			rb_erase_cached(&entry->node, &tracker->tree);
264*b1137e0bSAlex Markuze 			tracker->nr_entries--;
265*b1137e0bSAlex Markuze 			kmem_cache_free(ceph_subvol_metric_entry_cachep, entry);
266*b1137e0bSAlex Markuze 			node = next;
267*b1137e0bSAlex Markuze 			continue;
268*b1137e0bSAlex Markuze 		}
269*b1137e0bSAlex Markuze 
270*b1137e0bSAlex Markuze 		if (idx >= count) {
271*b1137e0bSAlex Markuze 			pr_warn("ceph: subvol metrics snapshot race (idx=%u count=%u)\n",
272*b1137e0bSAlex Markuze 				idx, count);
273*b1137e0bSAlex Markuze 			break;
274*b1137e0bSAlex Markuze 		}
275*b1137e0bSAlex Markuze 
276*b1137e0bSAlex Markuze 		snap[idx].subvolume_id = entry->subvolume_id;
277*b1137e0bSAlex Markuze 		snap[idx].read_ops = entry->read_ops;
278*b1137e0bSAlex Markuze 		snap[idx].write_ops = entry->write_ops;
279*b1137e0bSAlex Markuze 		snap[idx].read_bytes = entry->read_bytes;
280*b1137e0bSAlex Markuze 		snap[idx].write_bytes = entry->write_bytes;
281*b1137e0bSAlex Markuze 		snap[idx].read_latency_us = entry->read_latency_us;
282*b1137e0bSAlex Markuze 		snap[idx].write_latency_us = entry->write_latency_us;
283*b1137e0bSAlex Markuze 		idx++;
284*b1137e0bSAlex Markuze 
285*b1137e0bSAlex Markuze 		if (consume) {
286*b1137e0bSAlex Markuze 			entry->read_ops = 0;
287*b1137e0bSAlex Markuze 			entry->write_ops = 0;
288*b1137e0bSAlex Markuze 			entry->read_bytes = 0;
289*b1137e0bSAlex Markuze 			entry->write_bytes = 0;
290*b1137e0bSAlex Markuze 			entry->read_latency_us = 0;
291*b1137e0bSAlex Markuze 			entry->write_latency_us = 0;
292*b1137e0bSAlex Markuze 			rb_erase_cached(&entry->node, &tracker->tree);
293*b1137e0bSAlex Markuze 			tracker->nr_entries--;
294*b1137e0bSAlex Markuze 			kmem_cache_free(ceph_subvol_metric_entry_cachep, entry);
295*b1137e0bSAlex Markuze 		}
296*b1137e0bSAlex Markuze 		node = next;
297*b1137e0bSAlex Markuze 	}
298*b1137e0bSAlex Markuze 	spin_unlock(&tracker->lock);
299*b1137e0bSAlex Markuze 
300*b1137e0bSAlex Markuze 	if (!idx) {
301*b1137e0bSAlex Markuze 		kfree(snap);
302*b1137e0bSAlex Markuze 		snap = NULL;
303*b1137e0bSAlex Markuze 		ret = 0;
304*b1137e0bSAlex Markuze 	} else {
305*b1137e0bSAlex Markuze 		*nr = idx;
306*b1137e0bSAlex Markuze 		*out = snap;
307*b1137e0bSAlex Markuze 	}
308*b1137e0bSAlex Markuze 
309*b1137e0bSAlex Markuze 	return ret;
310*b1137e0bSAlex Markuze }
311*b1137e0bSAlex Markuze 
ceph_subvolume_metrics_free_snapshot(struct ceph_subvol_metric_snapshot * snapshot)312*b1137e0bSAlex Markuze void ceph_subvolume_metrics_free_snapshot(struct ceph_subvol_metric_snapshot *snapshot)
313*b1137e0bSAlex Markuze {
314*b1137e0bSAlex Markuze 	kfree(snapshot);
315*b1137e0bSAlex Markuze }
316*b1137e0bSAlex Markuze 
317*b1137e0bSAlex Markuze /*
318*b1137e0bSAlex Markuze  * Dump subvolume metrics to a seq_file for debugfs.
319*b1137e0bSAlex Markuze  *
320*b1137e0bSAlex Markuze  * Iterates the rb-tree directly under spinlock to avoid allocation.
321*b1137e0bSAlex Markuze  * The lock hold time is minimal since we're only doing seq_printf calls.
322*b1137e0bSAlex Markuze  */
ceph_subvolume_metrics_dump(struct ceph_subvolume_metrics_tracker * tracker,struct seq_file * s)323*b1137e0bSAlex Markuze void ceph_subvolume_metrics_dump(struct ceph_subvolume_metrics_tracker *tracker,
324*b1137e0bSAlex Markuze 				 struct seq_file *s)
325*b1137e0bSAlex Markuze {
326*b1137e0bSAlex Markuze 	struct rb_node *node;
327*b1137e0bSAlex Markuze 	bool found = false;
328*b1137e0bSAlex Markuze 
329*b1137e0bSAlex Markuze 	spin_lock(&tracker->lock);
330*b1137e0bSAlex Markuze 	if (!tracker->enabled) {
331*b1137e0bSAlex Markuze 		spin_unlock(&tracker->lock);
332*b1137e0bSAlex Markuze 		seq_puts(s, "subvolume metrics disabled\n");
333*b1137e0bSAlex Markuze 		return;
334*b1137e0bSAlex Markuze 	}
335*b1137e0bSAlex Markuze 
336*b1137e0bSAlex Markuze 	for (node = rb_first_cached(&tracker->tree); node; node = rb_next(node)) {
337*b1137e0bSAlex Markuze 		struct ceph_subvol_metric_rb_entry *entry =
338*b1137e0bSAlex Markuze 			rb_entry(node, struct ceph_subvol_metric_rb_entry, node);
339*b1137e0bSAlex Markuze 		u64 avg_rd_lat, avg_wr_lat;
340*b1137e0bSAlex Markuze 
341*b1137e0bSAlex Markuze 		if (!entry->read_ops && !entry->write_ops)
342*b1137e0bSAlex Markuze 			continue;
343*b1137e0bSAlex Markuze 
344*b1137e0bSAlex Markuze 		if (!found) {
345*b1137e0bSAlex Markuze 			seq_puts(s, "subvol_id       rd_ops    rd_bytes    rd_avg_lat_us  wr_ops    wr_bytes    wr_avg_lat_us\n");
346*b1137e0bSAlex Markuze 			seq_puts(s, "------------------------------------------------------------------------------------------------\n");
347*b1137e0bSAlex Markuze 			found = true;
348*b1137e0bSAlex Markuze 		}
349*b1137e0bSAlex Markuze 
350*b1137e0bSAlex Markuze 		avg_rd_lat = entry->read_ops ?
351*b1137e0bSAlex Markuze 			div64_u64(entry->read_latency_us, entry->read_ops) : 0;
352*b1137e0bSAlex Markuze 		avg_wr_lat = entry->write_ops ?
353*b1137e0bSAlex Markuze 			div64_u64(entry->write_latency_us, entry->write_ops) : 0;
354*b1137e0bSAlex Markuze 
355*b1137e0bSAlex Markuze 		seq_printf(s, "%-15llu%-10llu%-12llu%-16llu%-10llu%-12llu%-16llu\n",
356*b1137e0bSAlex Markuze 			   entry->subvolume_id,
357*b1137e0bSAlex Markuze 			   entry->read_ops,
358*b1137e0bSAlex Markuze 			   entry->read_bytes,
359*b1137e0bSAlex Markuze 			   avg_rd_lat,
360*b1137e0bSAlex Markuze 			   entry->write_ops,
361*b1137e0bSAlex Markuze 			   entry->write_bytes,
362*b1137e0bSAlex Markuze 			   avg_wr_lat);
363*b1137e0bSAlex Markuze 	}
364*b1137e0bSAlex Markuze 	spin_unlock(&tracker->lock);
365*b1137e0bSAlex Markuze 
366*b1137e0bSAlex Markuze 	if (!found)
367*b1137e0bSAlex Markuze 		seq_puts(s, "(no subvolume metrics collected)\n");
368*b1137e0bSAlex Markuze }
369*b1137e0bSAlex Markuze 
ceph_subvolume_metrics_record_io(struct ceph_mds_client * mdsc,struct ceph_inode_info * ci,bool is_write,size_t bytes,ktime_t start,ktime_t end)370*b1137e0bSAlex Markuze void ceph_subvolume_metrics_record_io(struct ceph_mds_client *mdsc,
371*b1137e0bSAlex Markuze 				      struct ceph_inode_info *ci,
372*b1137e0bSAlex Markuze 				      bool is_write, size_t bytes,
373*b1137e0bSAlex Markuze 				      ktime_t start, ktime_t end)
374*b1137e0bSAlex Markuze {
375*b1137e0bSAlex Markuze 	struct ceph_subvolume_metrics_tracker *tracker;
376*b1137e0bSAlex Markuze 	u64 subvol_id;
377*b1137e0bSAlex Markuze 	s64 delta_us;
378*b1137e0bSAlex Markuze 
379*b1137e0bSAlex Markuze 	if (!mdsc || !ci || !bytes)
380*b1137e0bSAlex Markuze 		return;
381*b1137e0bSAlex Markuze 
382*b1137e0bSAlex Markuze 	tracker = &mdsc->subvol_metrics;
383*b1137e0bSAlex Markuze 	atomic64_inc(&tracker->record_calls);
384*b1137e0bSAlex Markuze 
385*b1137e0bSAlex Markuze 	if (!ceph_subvolume_metrics_enabled(tracker)) {
386*b1137e0bSAlex Markuze 		atomic64_inc(&tracker->record_disabled);
387*b1137e0bSAlex Markuze 		return;
388*b1137e0bSAlex Markuze 	}
389*b1137e0bSAlex Markuze 
390*b1137e0bSAlex Markuze 	subvol_id = READ_ONCE(ci->i_subvolume_id);
391*b1137e0bSAlex Markuze 	if (subvol_id == CEPH_SUBVOLUME_ID_NONE) {
392*b1137e0bSAlex Markuze 		atomic64_inc(&tracker->record_no_subvol);
393*b1137e0bSAlex Markuze 		return;
394*b1137e0bSAlex Markuze 	}
395*b1137e0bSAlex Markuze 
396*b1137e0bSAlex Markuze 	delta_us = ktime_to_us(ktime_sub(end, start));
397*b1137e0bSAlex Markuze 	if (delta_us <= 0)
398*b1137e0bSAlex Markuze 		delta_us = 1;
399*b1137e0bSAlex Markuze 
400*b1137e0bSAlex Markuze 	ceph_subvolume_metrics_record(tracker, subvol_id, is_write,
401*b1137e0bSAlex Markuze 				      bytes, (u64)delta_us);
402*b1137e0bSAlex Markuze }
403*b1137e0bSAlex Markuze 
ceph_subvolume_metrics_cache_init(void)404*b1137e0bSAlex Markuze int __init ceph_subvolume_metrics_cache_init(void)
405*b1137e0bSAlex Markuze {
406*b1137e0bSAlex Markuze 	ceph_subvol_metric_entry_cachep = KMEM_CACHE(ceph_subvol_metric_rb_entry,
407*b1137e0bSAlex Markuze 						    SLAB_RECLAIM_ACCOUNT);
408*b1137e0bSAlex Markuze 	if (!ceph_subvol_metric_entry_cachep)
409*b1137e0bSAlex Markuze 		return -ENOMEM;
410*b1137e0bSAlex Markuze 	return 0;
411*b1137e0bSAlex Markuze }
412*b1137e0bSAlex Markuze 
ceph_subvolume_metrics_cache_destroy(void)413*b1137e0bSAlex Markuze void ceph_subvolume_metrics_cache_destroy(void)
414*b1137e0bSAlex Markuze {
415*b1137e0bSAlex Markuze 	kmem_cache_destroy(ceph_subvol_metric_entry_cachep);
416*b1137e0bSAlex Markuze }
417