xref: /linux/fs/ext4/hash.c (revision 63307d015b91e626c97bb82e88054af3d0b74643)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  linux/fs/ext4/hash.c
4  *
5  * Copyright (C) 2002 by Theodore Ts'o
6  */
7 
8 #include <linux/fs.h>
9 #include <linux/unicode.h>
10 #include <linux/compiler.h>
11 #include <linux/bitops.h>
12 #include "ext4.h"
13 
14 #define DELTA 0x9E3779B9
15 
16 static void TEA_transform(__u32 buf[4], __u32 const in[])
17 {
18 	__u32	sum = 0;
19 	__u32	b0 = buf[0], b1 = buf[1];
20 	__u32	a = in[0], b = in[1], c = in[2], d = in[3];
21 	int	n = 16;
22 
23 	do {
24 		sum += DELTA;
25 		b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b);
26 		b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d);
27 	} while (--n);
28 
29 	buf[0] += b0;
30 	buf[1] += b1;
31 }
32 
33 /* F, G and H are basic MD4 functions: selection, majority, parity */
34 #define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
35 #define G(x, y, z) (((x) & (y)) + (((x) ^ (y)) & (z)))
36 #define H(x, y, z) ((x) ^ (y) ^ (z))
37 
38 /*
39  * The generic round function.  The application is so specific that
40  * we don't bother protecting all the arguments with parens, as is generally
41  * good macro practice, in favor of extra legibility.
42  * Rotation is separate from addition to prevent recomputation
43  */
44 #define ROUND(f, a, b, c, d, x, s)	\
45 	(a += f(b, c, d) + x, a = rol32(a, s))
46 #define K1 0
47 #define K2 013240474631UL
48 #define K3 015666365641UL
49 
50 /*
51  * Basic cut-down MD4 transform.  Returns only 32 bits of result.
52  */
53 static __u32 half_md4_transform(__u32 buf[4], __u32 const in[8])
54 {
55 	__u32 a = buf[0], b = buf[1], c = buf[2], d = buf[3];
56 
57 	/* Round 1 */
58 	ROUND(F, a, b, c, d, in[0] + K1,  3);
59 	ROUND(F, d, a, b, c, in[1] + K1,  7);
60 	ROUND(F, c, d, a, b, in[2] + K1, 11);
61 	ROUND(F, b, c, d, a, in[3] + K1, 19);
62 	ROUND(F, a, b, c, d, in[4] + K1,  3);
63 	ROUND(F, d, a, b, c, in[5] + K1,  7);
64 	ROUND(F, c, d, a, b, in[6] + K1, 11);
65 	ROUND(F, b, c, d, a, in[7] + K1, 19);
66 
67 	/* Round 2 */
68 	ROUND(G, a, b, c, d, in[1] + K2,  3);
69 	ROUND(G, d, a, b, c, in[3] + K2,  5);
70 	ROUND(G, c, d, a, b, in[5] + K2,  9);
71 	ROUND(G, b, c, d, a, in[7] + K2, 13);
72 	ROUND(G, a, b, c, d, in[0] + K2,  3);
73 	ROUND(G, d, a, b, c, in[2] + K2,  5);
74 	ROUND(G, c, d, a, b, in[4] + K2,  9);
75 	ROUND(G, b, c, d, a, in[6] + K2, 13);
76 
77 	/* Round 3 */
78 	ROUND(H, a, b, c, d, in[3] + K3,  3);
79 	ROUND(H, d, a, b, c, in[7] + K3,  9);
80 	ROUND(H, c, d, a, b, in[2] + K3, 11);
81 	ROUND(H, b, c, d, a, in[6] + K3, 15);
82 	ROUND(H, a, b, c, d, in[1] + K3,  3);
83 	ROUND(H, d, a, b, c, in[5] + K3,  9);
84 	ROUND(H, c, d, a, b, in[0] + K3, 11);
85 	ROUND(H, b, c, d, a, in[4] + K3, 15);
86 
87 	buf[0] += a;
88 	buf[1] += b;
89 	buf[2] += c;
90 	buf[3] += d;
91 
92 	return buf[1]; /* "most hashed" word */
93 }
94 #undef ROUND
95 #undef K1
96 #undef K2
97 #undef K3
98 #undef F
99 #undef G
100 #undef H
101 
102 /* The old legacy hash */
103 static __u32 dx_hack_hash_unsigned(const char *name, int len)
104 {
105 	__u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
106 	const unsigned char *ucp = (const unsigned char *) name;
107 
108 	while (len--) {
109 		hash = hash1 + (hash0 ^ (((int) *ucp++) * 7152373));
110 
111 		if (hash & 0x80000000)
112 			hash -= 0x7fffffff;
113 		hash1 = hash0;
114 		hash0 = hash;
115 	}
116 	return hash0 << 1;
117 }
118 
119 static __u32 dx_hack_hash_signed(const char *name, int len)
120 {
121 	__u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
122 	const signed char *scp = (const signed char *) name;
123 
124 	while (len--) {
125 		hash = hash1 + (hash0 ^ (((int) *scp++) * 7152373));
126 
127 		if (hash & 0x80000000)
128 			hash -= 0x7fffffff;
129 		hash1 = hash0;
130 		hash0 = hash;
131 	}
132 	return hash0 << 1;
133 }
134 
135 static void str2hashbuf_signed(const char *msg, int len, __u32 *buf, int num)
136 {
137 	__u32	pad, val;
138 	int	i;
139 	const signed char *scp = (const signed char *) msg;
140 
141 	pad = (__u32)len | ((__u32)len << 8);
142 	pad |= pad << 16;
143 
144 	val = pad;
145 	if (len > num*4)
146 		len = num * 4;
147 	for (i = 0; i < len; i++) {
148 		val = ((int) scp[i]) + (val << 8);
149 		if ((i % 4) == 3) {
150 			*buf++ = val;
151 			val = pad;
152 			num--;
153 		}
154 	}
155 	if (--num >= 0)
156 		*buf++ = val;
157 	while (--num >= 0)
158 		*buf++ = pad;
159 }
160 
161 static void str2hashbuf_unsigned(const char *msg, int len, __u32 *buf, int num)
162 {
163 	__u32	pad, val;
164 	int	i;
165 	const unsigned char *ucp = (const unsigned char *) msg;
166 
167 	pad = (__u32)len | ((__u32)len << 8);
168 	pad |= pad << 16;
169 
170 	val = pad;
171 	if (len > num*4)
172 		len = num * 4;
173 	for (i = 0; i < len; i++) {
174 		val = ((int) ucp[i]) + (val << 8);
175 		if ((i % 4) == 3) {
176 			*buf++ = val;
177 			val = pad;
178 			num--;
179 		}
180 	}
181 	if (--num >= 0)
182 		*buf++ = val;
183 	while (--num >= 0)
184 		*buf++ = pad;
185 }
186 
187 /*
188  * Returns the hash of a filename.  If len is 0 and name is NULL, then
189  * this function can be used to test whether or not a hash version is
190  * supported.
191  *
192  * The seed is an 4 longword (32 bits) "secret" which can be used to
193  * uniquify a hash.  If the seed is all zero's, then some default seed
194  * may be used.
195  *
196  * A particular hash version specifies whether or not the seed is
197  * represented, and whether or not the returned hash is 32 bits or 64
198  * bits.  32 bit hashes will return 0 for the minor hash.
199  */
200 static int __ext4fs_dirhash(const char *name, int len,
201 			    struct dx_hash_info *hinfo)
202 {
203 	__u32	hash;
204 	__u32	minor_hash = 0;
205 	const char	*p;
206 	int		i;
207 	__u32		in[8], buf[4];
208 	void		(*str2hashbuf)(const char *, int, __u32 *, int) =
209 				str2hashbuf_signed;
210 
211 	/* Initialize the default seed for the hash checksum functions */
212 	buf[0] = 0x67452301;
213 	buf[1] = 0xefcdab89;
214 	buf[2] = 0x98badcfe;
215 	buf[3] = 0x10325476;
216 
217 	/* Check to see if the seed is all zero's */
218 	if (hinfo->seed) {
219 		for (i = 0; i < 4; i++) {
220 			if (hinfo->seed[i]) {
221 				memcpy(buf, hinfo->seed, sizeof(buf));
222 				break;
223 			}
224 		}
225 	}
226 
227 	switch (hinfo->hash_version) {
228 	case DX_HASH_LEGACY_UNSIGNED:
229 		hash = dx_hack_hash_unsigned(name, len);
230 		break;
231 	case DX_HASH_LEGACY:
232 		hash = dx_hack_hash_signed(name, len);
233 		break;
234 	case DX_HASH_HALF_MD4_UNSIGNED:
235 		str2hashbuf = str2hashbuf_unsigned;
236 		/* fall through */
237 	case DX_HASH_HALF_MD4:
238 		p = name;
239 		while (len > 0) {
240 			(*str2hashbuf)(p, len, in, 8);
241 			half_md4_transform(buf, in);
242 			len -= 32;
243 			p += 32;
244 		}
245 		minor_hash = buf[2];
246 		hash = buf[1];
247 		break;
248 	case DX_HASH_TEA_UNSIGNED:
249 		str2hashbuf = str2hashbuf_unsigned;
250 		/* fall through */
251 	case DX_HASH_TEA:
252 		p = name;
253 		while (len > 0) {
254 			(*str2hashbuf)(p, len, in, 4);
255 			TEA_transform(buf, in);
256 			len -= 16;
257 			p += 16;
258 		}
259 		hash = buf[0];
260 		minor_hash = buf[1];
261 		break;
262 	default:
263 		hinfo->hash = 0;
264 		return -1;
265 	}
266 	hash = hash & ~1;
267 	if (hash == (EXT4_HTREE_EOF_32BIT << 1))
268 		hash = (EXT4_HTREE_EOF_32BIT - 1) << 1;
269 	hinfo->hash = hash;
270 	hinfo->minor_hash = minor_hash;
271 	return 0;
272 }
273 
274 int ext4fs_dirhash(const struct inode *dir, const char *name, int len,
275 		   struct dx_hash_info *hinfo)
276 {
277 #ifdef CONFIG_UNICODE
278 	const struct unicode_map *um = EXT4_SB(dir->i_sb)->s_encoding;
279 	int r, dlen;
280 	unsigned char *buff;
281 	struct qstr qstr = {.name = name, .len = len };
282 
283 	if (len && IS_CASEFOLDED(dir)) {
284 		buff = kzalloc(sizeof(char) * PATH_MAX, GFP_KERNEL);
285 		if (!buff)
286 			return -ENOMEM;
287 
288 		dlen = utf8_casefold(um, &qstr, buff, PATH_MAX);
289 		if (dlen < 0) {
290 			kfree(buff);
291 			goto opaque_seq;
292 		}
293 
294 		r = __ext4fs_dirhash(buff, dlen, hinfo);
295 
296 		kfree(buff);
297 		return r;
298 	}
299 opaque_seq:
300 #endif
301 	return __ext4fs_dirhash(name, len, hinfo);
302 }
303