xref: /linux/fs/bcachefs/mean_and_variance.c (revision d2a4a07190f42e4f82805daf58e708400b703f1c)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Functions for incremental mean and variance.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 as published by
7  * the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  * Copyright © 2022 Daniel B. Hill
15  *
16  * Author: Daniel B. Hill <daniel@gluo.nz>
17  *
18  * Description:
19  *
20  * This is includes some incremental algorithms for mean and variance calculation
21  *
22  * Derived from the paper: https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf
23  *
24  * Create a struct and if it's the weighted variant set the w field (weight = 2^k).
25  *
26  * Use mean_and_variance[_weighted]_update() on the struct to update it's state.
27  *
28  * Use the mean_and_variance[_weighted]_get_* functions to calculate the mean and variance, some computation
29  * is deferred to these functions for performance reasons.
30  *
31  * see lib/math/mean_and_variance_test.c for examples of usage.
32  *
33  * DO NOT access the mean and variance fields of the weighted variants directly.
34  * DO NOT change the weight after calling update.
35  */
36 
37 #include <linux/bug.h>
38 #include <linux/compiler.h>
39 #include <linux/export.h>
40 #include <linux/limits.h>
41 #include <linux/math.h>
42 #include <linux/math64.h>
43 #include <linux/module.h>
44 
45 #include "mean_and_variance.h"
46 
47 u128_u u128_div(u128_u n, u64 d)
48 {
49 	u128_u r;
50 	u64 rem;
51 	u64 hi = u128_hi(n);
52 	u64 lo = u128_lo(n);
53 	u64  h =  hi & ((u64) U32_MAX  << 32);
54 	u64  l = (hi &  (u64) U32_MAX) << 32;
55 
56 	r =             u128_shl(u64_to_u128(div64_u64_rem(h,                d, &rem)), 64);
57 	r = u128_add(r, u128_shl(u64_to_u128(div64_u64_rem(l  + (rem << 32), d, &rem)), 32));
58 	r = u128_add(r,          u64_to_u128(div64_u64_rem(lo + (rem << 32), d, &rem)));
59 	return r;
60 }
61 EXPORT_SYMBOL_GPL(u128_div);
62 
63 /**
64  * mean_and_variance_get_mean() - get mean from @s
65  * @s: mean and variance number of samples and their sums
66  */
67 s64 mean_and_variance_get_mean(struct mean_and_variance s)
68 {
69 	return s.n ? div64_u64(s.sum, s.n) : 0;
70 }
71 EXPORT_SYMBOL_GPL(mean_and_variance_get_mean);
72 
73 /**
74  * mean_and_variance_get_variance() -  get variance from @s1
75  * @s1: mean and variance number of samples and sums
76  *
77  * see linked pdf equation 12.
78  */
79 u64 mean_and_variance_get_variance(struct mean_and_variance s1)
80 {
81 	if (s1.n) {
82 		u128_u s2 = u128_div(s1.sum_squares, s1.n);
83 		u64  s3 = abs(mean_and_variance_get_mean(s1));
84 
85 		return u128_lo(u128_sub(s2, u128_square(s3)));
86 	} else {
87 		return 0;
88 	}
89 }
90 EXPORT_SYMBOL_GPL(mean_and_variance_get_variance);
91 
92 /**
93  * mean_and_variance_get_stddev() - get standard deviation from @s
94  * @s: mean and variance number of samples and their sums
95  */
96 u32 mean_and_variance_get_stddev(struct mean_and_variance s)
97 {
98 	return int_sqrt64(mean_and_variance_get_variance(s));
99 }
100 EXPORT_SYMBOL_GPL(mean_and_variance_get_stddev);
101 
102 /**
103  * mean_and_variance_weighted_update() - exponentially weighted variant of mean_and_variance_update()
104  * @s: mean and variance number of samples and their sums
105  * @x: new value to include in the &mean_and_variance_weighted
106  * @initted: caller must track whether this is the first use or not
107  * @weight: ewma weight
108  *
109  * see linked pdf: function derived from equations 140-143 where alpha = 2^w.
110  * values are stored bitshifted for performance and added precision.
111  */
112 void mean_and_variance_weighted_update(struct mean_and_variance_weighted *s,
113 		s64 x, bool initted, u8 weight)
114 {
115 	// previous weighted variance.
116 	u8 w		= weight;
117 	u64 var_w0	= s->variance;
118 	// new value weighted.
119 	s64 x_w		= x << w;
120 	s64 diff_w	= x_w - s->mean;
121 	s64 diff	= fast_divpow2(diff_w, w);
122 	// new mean weighted.
123 	s64 u_w1	= s->mean + diff;
124 
125 	if (!initted) {
126 		s->mean = x_w;
127 		s->variance = 0;
128 	} else {
129 		s->mean = u_w1;
130 		s->variance = ((var_w0 << w) - var_w0 + ((diff_w * (x_w - u_w1)) >> w)) >> w;
131 	}
132 }
133 EXPORT_SYMBOL_GPL(mean_and_variance_weighted_update);
134 
135 /**
136  * mean_and_variance_weighted_get_mean() - get mean from @s
137  * @s: mean and variance number of samples and their sums
138  * @weight: ewma weight
139  */
140 s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s,
141 		u8 weight)
142 {
143 	return fast_divpow2(s.mean, weight);
144 }
145 EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_mean);
146 
147 /**
148  * mean_and_variance_weighted_get_variance() -- get variance from @s
149  * @s: mean and variance number of samples and their sums
150  * @weight: ewma weight
151  */
152 u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s,
153 		u8 weight)
154 {
155 	// always positive don't need fast divpow2
156 	return s.variance >> weight;
157 }
158 EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_variance);
159 
160 /**
161  * mean_and_variance_weighted_get_stddev() - get standard deviation from @s
162  * @s: mean and variance number of samples and their sums
163  * @weight: ewma weight
164  */
165 u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s,
166 		u8 weight)
167 {
168 	return int_sqrt64(mean_and_variance_weighted_get_variance(s, weight));
169 }
170 EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_stddev);
171 
172 MODULE_AUTHOR("Daniel B. Hill");
173 MODULE_LICENSE("GPL");
174