xref: /freebsd/sys/contrib/openzfs/module/zfs/zfs_crrd.c (revision df58e8b1506f241670be86a560fb6e8432043aee)
1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or https://opensource.org/licenses/CDDL-1.0.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright (c) 2024 Klara Inc.
24  *
25  * This software was developed by
26  * Mariusz Zaborski <mariusz.zaborski@klarasystems.com>
27  * Fred Weigel <fred.weigel@klarasystems.com>
28  * under sponsorship from Wasabi Technology, Inc. and Klara Inc.
29  */
30 /*
31  * This file implements a round-robin database that stores timestamps and txg
32  * numbers. Due to limited space, we use a round-robin approach, where
33  * the oldest records are overwritten when there is no longer enough room.
34  * This is a best-effort mechanism, and the database should be treated as
35  * an approximation. Consider this before consuming it.
36  *
37  * The database is linear, meaning we assume each new entry is newer than the
38  * ones already stored. Because of this, if time is manipulated, the database
39  * will only accept records that are newer than the existing ones.
40  * (For example, jumping 10 years into the future and then back can lead to
41  * situation when for 10 years we wont write anything to database)
42  *
43  * All times stored in the database use UTC, which makes it easy to convert to
44  * and from local time.
45  *
46  * Each database holds 256 records (as defined in the `RRD_MAX_ENTRIES` macro).
47  * This limit comes from the maximum size of a ZAP object, where we store the
48  * binary blob.
49  *
50  * We've split the database into three smaller ones.
51  * The `minute database` provides high resolution (default: every 10 minutes),
52  * but only covers approximately 1.5 days. This gives a detailed view of recent
53  * activity, useful, for example, when performing a scrub of the last hour.
54  * The `daily database` records one txg per day. With 256 entries, it retains
55  * roughly 8 months of data. This allows users to scrub or analyze txgs across
56  * a range of days.
57  * The `monthly database` stores one record per month, giving approximately
58  * 21 years of history.
59  * All these calculations assume the worst-case scenario: the pool is always
60  * online and actively written to.
61  *
62  * A potential source of confusion is that the database does not store data
63  * while the pool is offline, leading to potential gaps in timeline. Also,
64  * the database contains no records from before this feature was enabled.
65  * Both, upon reflection, are expected.
66  */
67 #include <sys/zfs_context.h>
68 
69 #include "zfs_crrd.h"
70 
71 rrd_data_t *
rrd_tail_entry(rrd_t * rrd)72 rrd_tail_entry(rrd_t *rrd)
73 {
74 	size_t n;
75 
76 	if (rrd_len(rrd) == 0)
77 		return (NULL);
78 
79 	if (rrd->rrd_tail == 0)
80 		n = RRD_MAX_ENTRIES - 1;
81 	else
82 		n = rrd->rrd_tail - 1;
83 
84 	return (&rrd->rrd_entries[n]);
85 }
86 
87 uint64_t
rrd_tail(rrd_t * rrd)88 rrd_tail(rrd_t *rrd)
89 {
90 	const rrd_data_t *tail;
91 
92 	tail = rrd_tail_entry(rrd);
93 
94 	return (tail == NULL ? 0 : tail->rrdd_time);
95 }
96 
97 /*
98  * Return length of data in the rrd.
99  * rrd_get works from 0..rrd_len()-1.
100  */
101 size_t
rrd_len(rrd_t * rrd)102 rrd_len(rrd_t *rrd)
103 {
104 
105 	return (rrd->rrd_length);
106 }
107 
108 const rrd_data_t *
rrd_entry(rrd_t * rrd,size_t i)109 rrd_entry(rrd_t *rrd, size_t i)
110 {
111 	size_t n;
112 
113 	if (i >= rrd_len(rrd)) {
114 		return (0);
115 	}
116 
117 	n = (rrd->rrd_head + i) % RRD_MAX_ENTRIES;
118 	return (&rrd->rrd_entries[n]);
119 }
120 
121 uint64_t
rrd_get(rrd_t * rrd,size_t i)122 rrd_get(rrd_t *rrd, size_t i)
123 {
124 	const rrd_data_t *data = rrd_entry(rrd, i);
125 
126 	return (data == NULL ? 0 : data->rrdd_txg);
127 }
128 
129 /* Add value to database. */
130 void
rrd_add(rrd_t * rrd,hrtime_t time,uint64_t txg)131 rrd_add(rrd_t *rrd, hrtime_t time, uint64_t txg)
132 {
133 	rrd_data_t *tail;
134 
135 	tail = rrd_tail_entry(rrd);
136 	if (tail != NULL && tail->rrdd_time == time) {
137 		if (tail->rrdd_txg < txg) {
138 			tail->rrdd_txg = txg;
139 		} else {
140 			return;
141 		}
142 	}
143 
144 	rrd->rrd_entries[rrd->rrd_tail].rrdd_time = time;
145 	rrd->rrd_entries[rrd->rrd_tail].rrdd_txg = txg;
146 
147 	rrd->rrd_tail = (rrd->rrd_tail + 1) % RRD_MAX_ENTRIES;
148 
149 	if (rrd->rrd_length < RRD_MAX_ENTRIES) {
150 		rrd->rrd_length++;
151 	} else {
152 		rrd->rrd_head = (rrd->rrd_head + 1) % RRD_MAX_ENTRIES;
153 	}
154 }
155 
156 void
dbrrd_add(dbrrd_t * db,hrtime_t time,uint64_t txg)157 dbrrd_add(dbrrd_t *db, hrtime_t time, uint64_t txg)
158 {
159 	hrtime_t daydiff, monthdiff, minutedif;
160 
161 	minutedif = time - rrd_tail(&db->dbr_minutes);
162 	daydiff = time - rrd_tail(&db->dbr_days);
163 	monthdiff = time - rrd_tail(&db->dbr_months);
164 
165 	if (monthdiff >= 0 && monthdiff >= SEC2NSEC(30 * 24 * 60 * 60))
166 		rrd_add(&db->dbr_months, time, txg);
167 	else if (daydiff >= 0 && daydiff >= SEC2NSEC(24 * 60 * 60))
168 		rrd_add(&db->dbr_days, time, txg);
169 	else if (minutedif >= 0)
170 		rrd_add(&db->dbr_minutes, time, txg);
171 }
172 
173 /*
174  * We could do a binary search here, but the routine isn't frequently
175  * called and the data is small so we stick to a simple loop.
176  */
177 static const rrd_data_t *
rrd_query(rrd_t * rrd,hrtime_t tv,dbrrd_rounding_t rounding)178 rrd_query(rrd_t *rrd, hrtime_t tv, dbrrd_rounding_t rounding)
179 {
180 	const rrd_data_t *data = NULL;
181 
182 	for (size_t i = 0; i < rrd_len(rrd); i++) {
183 		const rrd_data_t *cur = rrd_entry(rrd, i);
184 
185 		if (rounding == DBRRD_FLOOR) {
186 			if (tv < cur->rrdd_time) {
187 				break;
188 			}
189 			data = cur;
190 		} else {
191 			/* DBRRD_CEILING */
192 			if (tv <= cur->rrdd_time) {
193 				data = cur;
194 				break;
195 			}
196 		}
197 	}
198 
199 	return (data);
200 }
201 
202 static const rrd_data_t *
dbrrd_closest(hrtime_t tv,const rrd_data_t * r1,const rrd_data_t * r2)203 dbrrd_closest(hrtime_t tv, const rrd_data_t *r1, const rrd_data_t *r2)
204 {
205 
206 	if (r1 == NULL)
207 		return (r2);
208 	if (r2 == NULL)
209 		return (r1);
210 
211 	return (ABS(tv - r1->rrdd_time) < ABS(tv - r2->rrdd_time) ? r1 : r2);
212 }
213 
214 uint64_t
dbrrd_query(dbrrd_t * r,hrtime_t tv,dbrrd_rounding_t rounding)215 dbrrd_query(dbrrd_t *r, hrtime_t tv, dbrrd_rounding_t rounding)
216 {
217 	const rrd_data_t *data, *dm, *dd, *dy;
218 
219 	data = NULL;
220 	dm = rrd_query(&r->dbr_minutes, tv, rounding);
221 	dd = rrd_query(&r->dbr_days, tv, rounding);
222 	dy = rrd_query(&r->dbr_months, tv, rounding);
223 
224 	data = dbrrd_closest(tv, dbrrd_closest(tv, dd, dm), dy);
225 
226 	return (data == NULL ? 0 : data->rrdd_txg);
227 }
228