1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3 * CDDL HEADER START
4 *
5 * The contents of this file are subject to the terms of the
6 * Common Development and Distribution License (the "License").
7 * You may not use this file except in compliance with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or https://opensource.org/licenses/CDDL-1.0.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright (c) 2024 Klara Inc.
24 *
25 * This software was developed by
26 * Mariusz Zaborski <mariusz.zaborski@klarasystems.com>
27 * Fred Weigel <fred.weigel@klarasystems.com>
28 * under sponsorship from Wasabi Technology, Inc. and Klara Inc.
29 */
30 /*
31 * This file implements a round-robin database that stores timestamps and txg
32 * numbers. Due to limited space, we use a round-robin approach, where
33 * the oldest records are overwritten when there is no longer enough room.
34 * This is a best-effort mechanism, and the database should be treated as
35 * an approximation. Consider this before consuming it.
36 *
37 * The database is linear, meaning we assume each new entry is newer than the
38 * ones already stored. Because of this, if time is manipulated, the database
39 * will only accept records that are newer than the existing ones.
40 * (For example, jumping 10 years into the future and then back can lead to
41 * situation when for 10 years we wont write anything to database)
42 *
43 * All times stored in the database use UTC, which makes it easy to convert to
44 * and from local time.
45 *
46 * Each database holds 256 records (as defined in the `RRD_MAX_ENTRIES` macro).
47 * This limit comes from the maximum size of a ZAP object, where we store the
48 * binary blob.
49 *
50 * We've split the database into three smaller ones.
51 * The `minute database` provides high resolution (default: every 10 minutes),
52 * but only covers approximately 1.5 days. This gives a detailed view of recent
53 * activity, useful, for example, when performing a scrub of the last hour.
54 * The `daily database` records one txg per day. With 256 entries, it retains
55 * roughly 8 months of data. This allows users to scrub or analyze txgs across
56 * a range of days.
57 * The `monthly database` stores one record per month, giving approximately
58 * 21 years of history.
59 * All these calculations assume the worst-case scenario: the pool is always
60 * online and actively written to.
61 *
62 * A potential source of confusion is that the database does not store data
63 * while the pool is offline, leading to potential gaps in timeline. Also,
64 * the database contains no records from before this feature was enabled.
65 * Both, upon reflection, are expected.
66 */
67 #include <sys/zfs_context.h>
68
69 #include "zfs_crrd.h"
70
71 rrd_data_t *
rrd_tail_entry(rrd_t * rrd)72 rrd_tail_entry(rrd_t *rrd)
73 {
74 size_t n;
75
76 if (rrd_len(rrd) == 0)
77 return (NULL);
78
79 if (rrd->rrd_tail == 0)
80 n = RRD_MAX_ENTRIES - 1;
81 else
82 n = rrd->rrd_tail - 1;
83
84 return (&rrd->rrd_entries[n]);
85 }
86
87 uint64_t
rrd_tail(rrd_t * rrd)88 rrd_tail(rrd_t *rrd)
89 {
90 const rrd_data_t *tail;
91
92 tail = rrd_tail_entry(rrd);
93
94 return (tail == NULL ? 0 : tail->rrdd_time);
95 }
96
97 /*
98 * Return length of data in the rrd.
99 * rrd_get works from 0..rrd_len()-1.
100 */
101 size_t
rrd_len(rrd_t * rrd)102 rrd_len(rrd_t *rrd)
103 {
104
105 return (rrd->rrd_length);
106 }
107
108 const rrd_data_t *
rrd_entry(rrd_t * rrd,size_t i)109 rrd_entry(rrd_t *rrd, size_t i)
110 {
111 size_t n;
112
113 if (i >= rrd_len(rrd)) {
114 return (0);
115 }
116
117 n = (rrd->rrd_head + i) % RRD_MAX_ENTRIES;
118 return (&rrd->rrd_entries[n]);
119 }
120
121 uint64_t
rrd_get(rrd_t * rrd,size_t i)122 rrd_get(rrd_t *rrd, size_t i)
123 {
124 const rrd_data_t *data = rrd_entry(rrd, i);
125
126 return (data == NULL ? 0 : data->rrdd_txg);
127 }
128
129 /* Add value to database. */
130 void
rrd_add(rrd_t * rrd,hrtime_t time,uint64_t txg)131 rrd_add(rrd_t *rrd, hrtime_t time, uint64_t txg)
132 {
133 rrd_data_t *tail;
134
135 tail = rrd_tail_entry(rrd);
136 if (tail != NULL && tail->rrdd_time == time) {
137 if (tail->rrdd_txg < txg) {
138 tail->rrdd_txg = txg;
139 } else {
140 return;
141 }
142 }
143
144 rrd->rrd_entries[rrd->rrd_tail].rrdd_time = time;
145 rrd->rrd_entries[rrd->rrd_tail].rrdd_txg = txg;
146
147 rrd->rrd_tail = (rrd->rrd_tail + 1) % RRD_MAX_ENTRIES;
148
149 if (rrd->rrd_length < RRD_MAX_ENTRIES) {
150 rrd->rrd_length++;
151 } else {
152 rrd->rrd_head = (rrd->rrd_head + 1) % RRD_MAX_ENTRIES;
153 }
154 }
155
156 void
dbrrd_add(dbrrd_t * db,hrtime_t time,uint64_t txg)157 dbrrd_add(dbrrd_t *db, hrtime_t time, uint64_t txg)
158 {
159 hrtime_t daydiff, monthdiff, minutedif;
160
161 minutedif = time - rrd_tail(&db->dbr_minutes);
162 daydiff = time - rrd_tail(&db->dbr_days);
163 monthdiff = time - rrd_tail(&db->dbr_months);
164
165 if (monthdiff >= 0 && monthdiff >= SEC2NSEC(30 * 24 * 60 * 60))
166 rrd_add(&db->dbr_months, time, txg);
167 else if (daydiff >= 0 && daydiff >= SEC2NSEC(24 * 60 * 60))
168 rrd_add(&db->dbr_days, time, txg);
169 else if (minutedif >= 0)
170 rrd_add(&db->dbr_minutes, time, txg);
171 }
172
173 /*
174 * We could do a binary search here, but the routine isn't frequently
175 * called and the data is small so we stick to a simple loop.
176 */
177 static const rrd_data_t *
rrd_query(rrd_t * rrd,hrtime_t tv,dbrrd_rounding_t rounding)178 rrd_query(rrd_t *rrd, hrtime_t tv, dbrrd_rounding_t rounding)
179 {
180 const rrd_data_t *data = NULL;
181
182 for (size_t i = 0; i < rrd_len(rrd); i++) {
183 const rrd_data_t *cur = rrd_entry(rrd, i);
184
185 if (rounding == DBRRD_FLOOR) {
186 if (tv < cur->rrdd_time) {
187 break;
188 }
189 data = cur;
190 } else {
191 /* DBRRD_CEILING */
192 if (tv <= cur->rrdd_time) {
193 data = cur;
194 break;
195 }
196 }
197 }
198
199 return (data);
200 }
201
202 static const rrd_data_t *
dbrrd_closest(hrtime_t tv,const rrd_data_t * r1,const rrd_data_t * r2)203 dbrrd_closest(hrtime_t tv, const rrd_data_t *r1, const rrd_data_t *r2)
204 {
205
206 if (r1 == NULL)
207 return (r2);
208 if (r2 == NULL)
209 return (r1);
210
211 return (ABS(tv - r1->rrdd_time) < ABS(tv - r2->rrdd_time) ? r1 : r2);
212 }
213
214 uint64_t
dbrrd_query(dbrrd_t * r,hrtime_t tv,dbrrd_rounding_t rounding)215 dbrrd_query(dbrrd_t *r, hrtime_t tv, dbrrd_rounding_t rounding)
216 {
217 const rrd_data_t *data, *dm, *dd, *dy;
218
219 data = NULL;
220 dm = rrd_query(&r->dbr_minutes, tv, rounding);
221 dd = rrd_query(&r->dbr_days, tv, rounding);
222 dy = rrd_query(&r->dbr_months, tv, rounding);
223
224 data = dbrrd_closest(tv, dbrrd_closest(tv, dd, dm), dy);
225
226 return (data == NULL ? 0 : data->rrdd_txg);
227 }
228