1 // SPDX-License-Identifier: CDDL-1.0 2 /* 3 * CDDL HEADER START 4 * 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License (the "License"). 7 * You may not use this file except in compliance with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or https://opensource.org/licenses/CDDL-1.0. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright (c) 2024 Klara Inc. 24 * 25 * This software was developed by 26 * Mariusz Zaborski <mariusz.zaborski@klarasystems.com> 27 * Fred Weigel <fred.weigel@klarasystems.com> 28 * under sponsorship from Wasabi Technology, Inc. and Klara Inc. 29 */ 30 /* 31 * This file implements a round-robin database that stores timestamps and txg 32 * numbers. Due to limited space, we use a round-robin approach, where 33 * the oldest records are overwritten when there is no longer enough room. 34 * This is a best-effort mechanism, and the database should be treated as 35 * an approximation. Consider this before consuming it. 36 * 37 * The database is linear, meaning we assume each new entry is newer than the 38 * ones already stored. Because of this, if time is manipulated, the database 39 * will only accept records that are newer than the existing ones. 40 * (For example, jumping 10 years into the future and then back can lead to 41 * situation when for 10 years we wont write anything to database) 42 * 43 * All times stored in the database use UTC, which makes it easy to convert to 44 * and from local time. 45 * 46 * Each database holds 256 records (as defined in the `RRD_MAX_ENTRIES` macro). 47 * This limit comes from the maximum size of a ZAP object, where we store the 48 * binary blob. 49 * 50 * We've split the database into three smaller ones. 51 * The `minute database` provides high resolution (default: every 10 minutes), 52 * but only covers approximately 1.5 days. This gives a detailed view of recent 53 * activity, useful, for example, when performing a scrub of the last hour. 54 * The `daily database` records one txg per day. With 256 entries, it retains 55 * roughly 8 months of data. This allows users to scrub or analyze txgs across 56 * a range of days. 57 * The `monthly database` stores one record per month, giving approximately 58 * 21 years of history. 59 * All these calculations assume the worst-case scenario: the pool is always 60 * online and actively written to. 61 * 62 * A potential source of confusion is that the database does not store data 63 * while the pool is offline, leading to potential gaps in timeline. Also, 64 * the database contains no records from before this feature was enabled. 65 * Both, upon reflection, are expected. 66 */ 67 #include <sys/zfs_context.h> 68 69 #include "zfs_crrd.h" 70 71 rrd_data_t * 72 rrd_tail_entry(rrd_t *rrd) 73 { 74 size_t n; 75 76 if (rrd_len(rrd) == 0) 77 return (NULL); 78 79 if (rrd->rrd_tail == 0) 80 n = RRD_MAX_ENTRIES - 1; 81 else 82 n = rrd->rrd_tail - 1; 83 84 return (&rrd->rrd_entries[n]); 85 } 86 87 uint64_t 88 rrd_tail(rrd_t *rrd) 89 { 90 const rrd_data_t *tail; 91 92 tail = rrd_tail_entry(rrd); 93 94 return (tail == NULL ? 0 : tail->rrdd_time); 95 } 96 97 /* 98 * Return length of data in the rrd. 99 * rrd_get works from 0..rrd_len()-1. 100 */ 101 size_t 102 rrd_len(rrd_t *rrd) 103 { 104 105 return (rrd->rrd_length); 106 } 107 108 const rrd_data_t * 109 rrd_entry(rrd_t *rrd, size_t i) 110 { 111 size_t n; 112 113 if (i >= rrd_len(rrd)) { 114 return (0); 115 } 116 117 n = (rrd->rrd_head + i) % RRD_MAX_ENTRIES; 118 return (&rrd->rrd_entries[n]); 119 } 120 121 uint64_t 122 rrd_get(rrd_t *rrd, size_t i) 123 { 124 const rrd_data_t *data = rrd_entry(rrd, i); 125 126 return (data == NULL ? 0 : data->rrdd_txg); 127 } 128 129 /* Add value to database. */ 130 void 131 rrd_add(rrd_t *rrd, hrtime_t time, uint64_t txg) 132 { 133 rrd_data_t *tail; 134 135 tail = rrd_tail_entry(rrd); 136 if (tail != NULL && tail->rrdd_time == time) { 137 if (tail->rrdd_txg < txg) { 138 tail->rrdd_txg = txg; 139 } else { 140 return; 141 } 142 } 143 144 rrd->rrd_entries[rrd->rrd_tail].rrdd_time = time; 145 rrd->rrd_entries[rrd->rrd_tail].rrdd_txg = txg; 146 147 rrd->rrd_tail = (rrd->rrd_tail + 1) % RRD_MAX_ENTRIES; 148 149 if (rrd->rrd_length < RRD_MAX_ENTRIES) { 150 rrd->rrd_length++; 151 } else { 152 rrd->rrd_head = (rrd->rrd_head + 1) % RRD_MAX_ENTRIES; 153 } 154 } 155 156 void 157 dbrrd_add(dbrrd_t *db, hrtime_t time, uint64_t txg) 158 { 159 hrtime_t daydiff, monthdiff, minutedif; 160 161 minutedif = time - rrd_tail(&db->dbr_minutes); 162 daydiff = time - rrd_tail(&db->dbr_days); 163 monthdiff = time - rrd_tail(&db->dbr_months); 164 165 if (monthdiff >= 0 && monthdiff >= SEC2NSEC(30 * 24 * 60 * 60)) 166 rrd_add(&db->dbr_months, time, txg); 167 else if (daydiff >= 0 && daydiff >= SEC2NSEC(24 * 60 * 60)) 168 rrd_add(&db->dbr_days, time, txg); 169 else if (minutedif >= 0) 170 rrd_add(&db->dbr_minutes, time, txg); 171 } 172 173 /* 174 * We could do a binary search here, but the routine isn't frequently 175 * called and the data is small so we stick to a simple loop. 176 */ 177 static const rrd_data_t * 178 rrd_query(rrd_t *rrd, hrtime_t tv, dbrrd_rounding_t rounding) 179 { 180 const rrd_data_t *data = NULL; 181 182 for (size_t i = 0; i < rrd_len(rrd); i++) { 183 const rrd_data_t *cur = rrd_entry(rrd, i); 184 185 if (rounding == DBRRD_FLOOR) { 186 if (tv < cur->rrdd_time) { 187 break; 188 } 189 data = cur; 190 } else { 191 /* DBRRD_CEILING */ 192 if (tv <= cur->rrdd_time) { 193 data = cur; 194 break; 195 } 196 } 197 } 198 199 return (data); 200 } 201 202 static const rrd_data_t * 203 dbrrd_closest(hrtime_t tv, const rrd_data_t *r1, const rrd_data_t *r2) 204 { 205 206 if (r1 == NULL) 207 return (r2); 208 if (r2 == NULL) 209 return (r1); 210 211 return (ABS(tv - r1->rrdd_time) < ABS(tv - r2->rrdd_time) ? r1 : r2); 212 } 213 214 uint64_t 215 dbrrd_query(dbrrd_t *r, hrtime_t tv, dbrrd_rounding_t rounding) 216 { 217 const rrd_data_t *data, *dm, *dd, *dy; 218 219 data = NULL; 220 dm = rrd_query(&r->dbr_minutes, tv, rounding); 221 dd = rrd_query(&r->dbr_days, tv, rounding); 222 dy = rrd_query(&r->dbr_months, tv, rounding); 223 224 data = dbrrd_closest(tv, dbrrd_closest(tv, dd, dm), dy); 225 226 return (data == NULL ? 0 : data->rrdd_txg); 227 } 228