xref: /freebsd/sys/fs/ext2fs/ext2_htree.c (revision b2d2a78ad80ec68d4a17f5aef97d21686cb1e29b)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2010, 2012 Zheng Liu <lz@freebsd.org>
5  * Copyright (c) 2012, Vyacheslav Matyushin
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/param.h>
31 #include <sys/endian.h>
32 #include <sys/systm.h>
33 #include <sys/namei.h>
34 #include <sys/bio.h>
35 #include <sys/buf.h>
36 #include <sys/endian.h>
37 #include <sys/mount.h>
38 #include <sys/vnode.h>
39 #include <sys/malloc.h>
40 #include <sys/dirent.h>
41 #include <sys/sdt.h>
42 #include <sys/sysctl.h>
43 
44 #include <ufs/ufs/dir.h>
45 
46 #include <fs/ext2fs/fs.h>
47 #include <fs/ext2fs/inode.h>
48 #include <fs/ext2fs/ext2_mount.h>
49 #include <fs/ext2fs/ext2fs.h>
50 #include <fs/ext2fs/fs.h>
51 #include <fs/ext2fs/ext2_extern.h>
52 #include <fs/ext2fs/ext2_dinode.h>
53 #include <fs/ext2fs/ext2_dir.h>
54 #include <fs/ext2fs/htree.h>
55 
56 SDT_PROVIDER_DECLARE(ext2fs);
57 /*
58  * ext2fs trace probe:
59  * arg0: verbosity. Higher numbers give more verbose messages
60  * arg1: Textual message
61  */
62 SDT_PROBE_DEFINE2(ext2fs, , trace, htree, "int", "char*");
63 
64 static void	ext2_append_entry(char *block, uint32_t blksize,
65 		    struct ext2fs_direct_2 *last_entry,
66 		    struct ext2fs_direct_2 *new_entry, int csum_size);
67 static int	ext2_htree_append_block(struct vnode *vp, char *data,
68 		    struct componentname *cnp, uint32_t blksize);
69 static int	ext2_htree_check_next(struct inode *ip, uint32_t hash,
70 		    const char *name, struct ext2fs_htree_lookup_info *info);
71 static int	ext2_htree_cmp_sort_entry(const void *e1, const void *e2);
72 static int	ext2_htree_find_leaf(struct inode *ip, const char *name,
73 		    int namelen, uint32_t *hash, uint8_t *hash_version,
74 		    struct ext2fs_htree_lookup_info *info);
75 static uint32_t ext2_htree_get_block(struct ext2fs_htree_entry *ep);
76 static uint16_t	ext2_htree_get_count(struct ext2fs_htree_entry *ep);
77 static uint32_t ext2_htree_get_hash(struct ext2fs_htree_entry *ep);
78 static uint16_t	ext2_htree_get_limit(struct ext2fs_htree_entry *ep);
79 static void	ext2_htree_insert_entry_to_level(struct ext2fs_htree_lookup_level *level,
80 		    uint32_t hash, uint32_t blk);
81 static void	ext2_htree_insert_entry(struct ext2fs_htree_lookup_info *info,
82 		    uint32_t hash, uint32_t blk);
83 static uint32_t	ext2_htree_node_limit(struct inode *ip);
84 static void	ext2_htree_set_block(struct ext2fs_htree_entry *ep,
85 		    uint32_t blk);
86 static void	ext2_htree_set_count(struct ext2fs_htree_entry *ep,
87 		    uint16_t cnt);
88 static void	ext2_htree_set_hash(struct ext2fs_htree_entry *ep,
89 		    uint32_t hash);
90 static void	ext2_htree_set_limit(struct ext2fs_htree_entry *ep,
91 		    uint16_t limit);
92 static int	ext2_htree_split_dirblock(struct inode *ip,
93 		    char *block1, char *block2, uint32_t blksize,
94 		    uint32_t *hash_seed, uint8_t hash_version,
95 		    uint32_t *split_hash, struct  ext2fs_direct_2 *entry);
96 static void	ext2_htree_release(struct ext2fs_htree_lookup_info *info);
97 static uint32_t	ext2_htree_root_limit(struct inode *ip, int len);
98 static int	ext2_htree_writebuf(struct inode *ip,
99 		    struct ext2fs_htree_lookup_info *info);
100 
101 int
102 ext2_htree_has_idx(struct inode *ip)
103 {
104 	if (EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_DIRHASHINDEX) &&
105 	    ip->i_flag & IN_E3INDEX)
106 		return (1);
107 	else
108 		return (0);
109 }
110 
111 static int
112 ext2_htree_check_next(struct inode *ip, uint32_t hash, const char *name,
113     struct ext2fs_htree_lookup_info *info)
114 {
115 	struct vnode *vp = ITOV(ip);
116 	struct ext2fs_htree_lookup_level *level;
117 	struct buf *bp;
118 	uint32_t next_hash;
119 	int idx = info->h_levels_num - 1;
120 	int levels = 0;
121 
122 	do {
123 		level = &info->h_levels[idx];
124 		level->h_entry++;
125 		if (level->h_entry < level->h_entries +
126 		    ext2_htree_get_count(level->h_entries))
127 			break;
128 		if (idx == 0)
129 			return (0);
130 		idx--;
131 		levels++;
132 	} while (1);
133 
134 	next_hash = ext2_htree_get_hash(level->h_entry);
135 	if ((hash & 1) == 0) {
136 		if (hash != (next_hash & ~1))
137 			return (0);
138 	}
139 
140 	while (levels > 0) {
141 		levels--;
142 		if (ext2_blkatoff(vp, ext2_htree_get_block(level->h_entry) *
143 		    ip->i_e2fs->e2fs_bsize, NULL, &bp) != 0)
144 			return (0);
145 		level = &info->h_levels[idx + 1];
146 		brelse(level->h_bp);
147 		level->h_bp = bp;
148 		level->h_entry = level->h_entries =
149 		    ((struct ext2fs_htree_node *)bp->b_data)->h_entries;
150 	}
151 
152 	return (1);
153 }
154 
155 static uint32_t
156 ext2_htree_get_block(struct ext2fs_htree_entry *ep)
157 {
158 	return (le32toh(ep->h_blk) & 0x00FFFFFF);
159 }
160 
161 static void
162 ext2_htree_set_block(struct ext2fs_htree_entry *ep, uint32_t blk)
163 {
164 	ep->h_blk = htole32(blk);
165 }
166 
167 static uint16_t
168 ext2_htree_get_count(struct ext2fs_htree_entry *ep)
169 {
170 	return (le16toh(((struct ext2fs_htree_count *)(ep))->h_entries_num));
171 }
172 
173 static void
174 ext2_htree_set_count(struct ext2fs_htree_entry *ep, uint16_t cnt)
175 {
176 	((struct ext2fs_htree_count *)(ep))->h_entries_num = htole16(cnt);
177 }
178 
179 static uint32_t
180 ext2_htree_get_hash(struct ext2fs_htree_entry *ep)
181 {
182 	return (le32toh(ep->h_hash));
183 }
184 
185 static uint16_t
186 ext2_htree_get_limit(struct ext2fs_htree_entry *ep)
187 {
188 	return (le16toh(((struct ext2fs_htree_count *)(ep))->h_entries_max));
189 }
190 
191 static void
192 ext2_htree_set_hash(struct ext2fs_htree_entry *ep, uint32_t hash)
193 {
194 	ep->h_hash = htole32(hash);
195 }
196 
197 static void
198 ext2_htree_set_limit(struct ext2fs_htree_entry *ep, uint16_t limit)
199 {
200 	((struct ext2fs_htree_count *)(ep))->h_entries_max = htole16(limit);
201 }
202 
203 static void
204 ext2_htree_release(struct ext2fs_htree_lookup_info *info)
205 {
206 	u_int i;
207 
208 	for (i = 0; i < info->h_levels_num; i++) {
209 		struct buf *bp = info->h_levels[i].h_bp;
210 
211 		if (bp != NULL)
212 			brelse(bp);
213 	}
214 }
215 
216 static uint32_t
217 ext2_htree_root_limit(struct inode *ip, int len)
218 {
219 	struct m_ext2fs *fs;
220 	uint32_t space;
221 
222 	fs = ip->i_e2fs;
223 	space = ip->i_e2fs->e2fs_bsize - EXT2_DIR_REC_LEN(1) -
224 	    EXT2_DIR_REC_LEN(2) - len;
225 
226 	if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM))
227 		space -= sizeof(struct ext2fs_htree_tail);
228 
229 	return (space / sizeof(struct ext2fs_htree_entry));
230 }
231 
232 static uint32_t
233 ext2_htree_node_limit(struct inode *ip)
234 {
235 	struct m_ext2fs *fs;
236 	uint32_t space;
237 
238 	fs = ip->i_e2fs;
239 	space = fs->e2fs_bsize - EXT2_DIR_REC_LEN(0);
240 
241 	if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM))
242 		space -= sizeof(struct ext2fs_htree_tail);
243 
244 	return (space / sizeof(struct ext2fs_htree_entry));
245 }
246 
247 static void
248 ext2_get_hash_seed(struct ext2fs* es, uint32_t* seed)
249 {
250 
251 	for (int i = 0; i < 4; i++)
252 		seed[i] = le32toh(es->e3fs_hash_seed[i]);
253 }
254 
255 static int
256 ext2_htree_find_leaf(struct inode *ip, const char *name, int namelen,
257     uint32_t *hash, uint8_t *hash_ver,
258     struct ext2fs_htree_lookup_info *info)
259 {
260 	struct vnode *vp;
261 	struct ext2fs *fs;
262 	struct m_ext2fs *m_fs;
263 	struct buf *bp = NULL;
264 	struct ext2fs_htree_root *rootp;
265 	struct ext2fs_htree_entry *entp, *start, *end, *middle, *found;
266 	struct ext2fs_htree_lookup_level *level_info;
267 	uint32_t hash_major = 0, hash_minor = 0;
268 	uint32_t levels, cnt;
269 	uint32_t hash_seed[4];
270 	uint8_t hash_version;
271 
272 	if (name == NULL || info == NULL)
273 		return (-1);
274 
275 	vp = ITOV(ip);
276 	fs = ip->i_e2fs->e2fs;
277 	m_fs = ip->i_e2fs;
278 
279 	if (ext2_blkatoff(vp, 0, NULL, &bp) != 0)
280 		return (-1);
281 
282 	info->h_levels_num = 1;
283 	info->h_levels[0].h_bp = bp;
284 	rootp = (struct ext2fs_htree_root *)bp->b_data;
285 	if (rootp->h_info.h_hash_version != EXT2_HTREE_LEGACY &&
286 	    rootp->h_info.h_hash_version != EXT2_HTREE_HALF_MD4 &&
287 	    rootp->h_info.h_hash_version != EXT2_HTREE_TEA)
288 		goto error;
289 
290 	hash_version = rootp->h_info.h_hash_version;
291 	if (hash_version <= EXT2_HTREE_TEA)
292 		hash_version += m_fs->e2fs_uhash;
293 	*hash_ver = hash_version;
294 
295 	ext2_get_hash_seed(fs, hash_seed);
296 	ext2_htree_hash(name, namelen, hash_seed,
297 	    hash_version, &hash_major, &hash_minor);
298 	*hash = hash_major;
299 
300 	if ((levels = rootp->h_info.h_ind_levels) > 1)
301 		goto error;
302 
303 	entp = (struct ext2fs_htree_entry *)(((char *)&rootp->h_info) +
304 	    rootp->h_info.h_info_len);
305 
306 	if (ext2_htree_get_limit(entp) !=
307 	    ext2_htree_root_limit(ip, rootp->h_info.h_info_len))
308 		goto error;
309 
310 	while (1) {
311 		cnt = ext2_htree_get_count(entp);
312 		if (cnt == 0 || cnt > ext2_htree_get_limit(entp))
313 			goto error;
314 
315 		start = entp + 1;
316 		end = entp + cnt - 1;
317 		while (start <= end) {
318 			middle = start + (end - start) / 2;
319 			if (ext2_htree_get_hash(middle) > hash_major)
320 				end = middle - 1;
321 			else
322 				start = middle + 1;
323 		}
324 		found = start - 1;
325 
326 		level_info = &(info->h_levels[info->h_levels_num - 1]);
327 		level_info->h_bp = bp;
328 		level_info->h_entries = entp;
329 		level_info->h_entry = found;
330 		if (levels == 0)
331 			return (0);
332 		levels--;
333 		if (ext2_blkatoff(vp,
334 		    ext2_htree_get_block(found) * m_fs->e2fs_bsize,
335 		    NULL, &bp) != 0)
336 			goto error;
337 		entp = ((struct ext2fs_htree_node *)bp->b_data)->h_entries;
338 		info->h_levels_num++;
339 		info->h_levels[info->h_levels_num - 1].h_bp = bp;
340 	}
341 
342 error:
343 	ext2_htree_release(info);
344 	return (-1);
345 }
346 
347 /*
348  * Try to lookup a directory entry in HTree index
349  */
350 int
351 ext2_htree_lookup(struct inode *ip, const char *name, int namelen,
352     struct buf **bpp, int *entryoffp, doff_t *offp,
353     doff_t *prevoffp, doff_t *endusefulp,
354     struct ext2fs_searchslot *ss)
355 {
356 	struct vnode *vp;
357 	struct ext2fs_htree_lookup_info info;
358 	struct ext2fs_htree_entry *leaf_node;
359 	struct m_ext2fs *m_fs;
360 	struct buf *bp;
361 	uint32_t blk;
362 	uint32_t dirhash;
363 	uint32_t bsize;
364 	uint8_t hash_version;
365 	int search_next;
366 	int found = 0;
367 
368 	m_fs = ip->i_e2fs;
369 	bsize = m_fs->e2fs_bsize;
370 	vp = ITOV(ip);
371 
372 	/* TODO: print error msg because we don't lookup '.' and '..' */
373 
374 	memset(&info, 0, sizeof(info));
375 	if (ext2_htree_find_leaf(ip, name, namelen, &dirhash,
376 	    &hash_version, &info))
377 		return (-1);
378 
379 	do {
380 		leaf_node = info.h_levels[info.h_levels_num - 1].h_entry;
381 		blk = ext2_htree_get_block(leaf_node);
382 		if (ext2_blkatoff(vp, blk * bsize, NULL, &bp) != 0) {
383 			ext2_htree_release(&info);
384 			return (-1);
385 		}
386 
387 		*offp = blk * bsize;
388 		*entryoffp = 0;
389 		*prevoffp = blk * bsize;
390 		*endusefulp = blk * bsize;
391 
392 		if (ss->slotstatus == NONE) {
393 			ss->slotoffset = -1;
394 			ss->slotfreespace = 0;
395 		}
396 
397 		if (ext2_search_dirblock(ip, bp->b_data, &found,
398 		    name, namelen, entryoffp, offp, prevoffp,
399 		    endusefulp, ss) != 0) {
400 			brelse(bp);
401 			ext2_htree_release(&info);
402 			return (-1);
403 		}
404 
405 		if (found) {
406 			*bpp = bp;
407 			ext2_htree_release(&info);
408 			return (0);
409 		}
410 
411 		brelse(bp);
412 		search_next = ext2_htree_check_next(ip, dirhash, name, &info);
413 	} while (search_next);
414 
415 	ext2_htree_release(&info);
416 	return (ENOENT);
417 }
418 
419 static int
420 ext2_htree_append_block(struct vnode *vp, char *data,
421     struct componentname *cnp, uint32_t blksize)
422 {
423 	struct iovec aiov;
424 	struct uio auio;
425 	struct inode *dp = VTOI(vp);
426 	uint64_t cursize, newsize;
427 	int error;
428 
429 	cursize = roundup(dp->i_size, blksize);
430 	newsize = cursize + blksize;
431 
432 	auio.uio_offset = cursize;
433 	auio.uio_resid = blksize;
434 	aiov.iov_len = blksize;
435 	aiov.iov_base = data;
436 	auio.uio_iov = &aiov;
437 	auio.uio_iovcnt = 1;
438 	auio.uio_rw = UIO_WRITE;
439 	auio.uio_segflg = UIO_SYSSPACE;
440 	auio.uio_td = NULL;
441 	error = VOP_WRITE(vp, &auio, IO_SYNC, cnp->cn_cred);
442 	if (!error)
443 		dp->i_size = newsize;
444 
445 	return (error);
446 }
447 
448 static int
449 ext2_htree_writebuf(struct inode* ip, struct ext2fs_htree_lookup_info *info)
450 {
451 	int i, error;
452 
453 	for (i = 0; i < info->h_levels_num; i++) {
454 		struct buf *bp = info->h_levels[i].h_bp;
455 		ext2_dx_csum_set(ip, (struct ext2fs_direct_2 *)bp->b_data);
456 		error = bwrite(bp);
457 		if (error)
458 			return (error);
459 	}
460 
461 	return (0);
462 }
463 
464 static void
465 ext2_htree_insert_entry_to_level(struct ext2fs_htree_lookup_level *level,
466     uint32_t hash, uint32_t blk)
467 {
468 	struct ext2fs_htree_entry *target;
469 	int entries_num;
470 
471 	target = level->h_entry + 1;
472 	entries_num = ext2_htree_get_count(level->h_entries);
473 
474 	memmove(target + 1, target, (char *)(level->h_entries + entries_num) -
475 	    (char *)target);
476 	ext2_htree_set_block(target, blk);
477 	ext2_htree_set_hash(target, hash);
478 	ext2_htree_set_count(level->h_entries, entries_num + 1);
479 }
480 
481 /*
482  * Insert an index entry to the index node.
483  */
484 static void
485 ext2_htree_insert_entry(struct ext2fs_htree_lookup_info *info,
486     uint32_t hash, uint32_t blk)
487 {
488 	struct ext2fs_htree_lookup_level *level;
489 
490 	level = &info->h_levels[info->h_levels_num - 1];
491 	ext2_htree_insert_entry_to_level(level, hash, blk);
492 }
493 
494 /*
495  * Compare two entry sort descriptors by name hash value.
496  * This is used together with qsort.
497  */
498 static int
499 ext2_htree_cmp_sort_entry(const void *e1, const void *e2)
500 {
501 	const struct ext2fs_htree_sort_entry *entry1, *entry2;
502 
503 	entry1 = (const struct ext2fs_htree_sort_entry *)e1;
504 	entry2 = (const struct ext2fs_htree_sort_entry *)e2;
505 
506 	if (le32toh(entry1->h_hash) < le32toh(entry2->h_hash))
507 		return (-1);
508 	if (le32toh(entry1->h_hash) > le32toh(entry2->h_hash))
509 		return (1);
510 	return (0);
511 }
512 
513 /*
514  * Append an entry to the end of the directory block.
515  */
516 static void
517 ext2_append_entry(char *block, uint32_t blksize,
518     struct ext2fs_direct_2 *last_entry,
519     struct ext2fs_direct_2 *new_entry, int csum_size)
520 {
521 	uint16_t entry_len;
522 
523 	entry_len = EXT2_DIR_REC_LEN(last_entry->e2d_namlen);
524 	last_entry->e2d_reclen = htole16(entry_len);
525 	last_entry = (struct ext2fs_direct_2 *)((char *)last_entry + entry_len);
526 	new_entry->e2d_reclen = htole16(block + blksize - (char *)last_entry -
527 	    csum_size);
528 	memcpy(last_entry, new_entry, EXT2_DIR_REC_LEN(new_entry->e2d_namlen));
529 }
530 
531 /*
532  * Move half of entries from the old directory block to the new one.
533  */
534 static int
535 ext2_htree_split_dirblock(struct inode *ip, char *block1, char *block2,
536     uint32_t blksize, uint32_t *hash_seed, uint8_t hash_version,
537     uint32_t *split_hash, struct ext2fs_direct_2 *entry)
538 {
539 	struct m_ext2fs *fs;
540 	int entry_cnt = 0;
541 	int size = 0, csum_size = 0;
542 	int i, k;
543 	uint32_t offset;
544 	uint16_t entry_len = 0;
545 	uint32_t entry_hash;
546 	struct ext2fs_direct_2 *ep, *last;
547 	char *dest;
548 	struct ext2fs_htree_sort_entry *sort_info;
549 
550 	fs = ip->i_e2fs;
551 	ep = (struct ext2fs_direct_2 *)block1;
552 	dest = block2;
553 	sort_info = (struct ext2fs_htree_sort_entry *)
554 	    ((char *)block2 + blksize);
555 
556 	if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM))
557 		csum_size = sizeof(struct ext2fs_direct_tail);
558 
559 	/*
560 	 * Calculate name hash value for the entry which is to be added.
561 	 */
562 	ext2_htree_hash(entry->e2d_name, entry->e2d_namlen, hash_seed,
563 	    hash_version, &entry_hash, NULL);
564 
565 	/*
566 	 * Fill in directory entry sort descriptors.
567 	 */
568 	while ((char *)ep < block1 + blksize - csum_size) {
569 		if (le32toh(ep->e2d_ino) && ep->e2d_namlen) {
570 			entry_cnt++;
571 			sort_info--;
572 			sort_info->h_size = ep->e2d_reclen;
573 			sort_info->h_offset = htole16((char *)ep - block1);
574 			ext2_htree_hash(ep->e2d_name, ep->e2d_namlen,
575 			    hash_seed, hash_version,
576 			    &sort_info->h_hash, NULL);
577 			sort_info->h_hash = htole32(sort_info->h_hash);
578 		}
579 		ep = (struct ext2fs_direct_2 *)
580 		    ((char *)ep + le16toh(ep->e2d_reclen));
581 	}
582 
583 	/*
584 	 * Sort directory entry descriptors by name hash value.
585 	 */
586 	qsort(sort_info, entry_cnt, sizeof(struct ext2fs_htree_sort_entry),
587 	    ext2_htree_cmp_sort_entry);
588 
589 	/*
590 	 * Count the number of entries to move to directory block 2.
591 	 */
592 	for (i = entry_cnt - 1; i >= 0; i--) {
593 		if (le16toh(sort_info[i].h_size) + size > blksize / 2)
594 			break;
595 		size += le16toh(sort_info[i].h_size);
596 	}
597 
598 	*split_hash = le32toh(sort_info[i + 1].h_hash);
599 
600 	/*
601 	 * Set collision bit.
602 	 */
603 	if (*split_hash == le32toh(sort_info[i].h_hash))
604 		*split_hash += 1;
605 
606 	/*
607 	 * Move half of directory entries from block 1 to block 2.
608 	 */
609 	for (k = i + 1; k < entry_cnt; k++) {
610 		ep = (struct ext2fs_direct_2 *)((char *)block1 +
611 		    le16toh(sort_info[k].h_offset));
612 		entry_len = EXT2_DIR_REC_LEN(ep->e2d_namlen);
613 		memcpy(dest, ep, entry_len);
614 		((struct ext2fs_direct_2 *)dest)->e2d_reclen =
615 		    htole16(entry_len);
616 		/* Mark directory entry as unused. */
617 		ep->e2d_ino = 0;
618 		dest += entry_len;
619 	}
620 	dest -= entry_len;
621 
622 	/* Shrink directory entries in block 1. */
623 	last = (struct ext2fs_direct_2 *)block1;
624 	entry_len = 0;
625 	for (offset = 0; offset < blksize - csum_size; ) {
626 		ep = (struct ext2fs_direct_2 *)(block1 + offset);
627 		offset += le16toh(ep->e2d_reclen);
628 		if (le32toh(ep->e2d_ino)) {
629 			last = (struct ext2fs_direct_2 *)
630 			    ((char *)last + entry_len);
631 			entry_len = EXT2_DIR_REC_LEN(ep->e2d_namlen);
632 			memcpy((void *)last, (void *)ep, entry_len);
633 			last->e2d_reclen = htole16(entry_len);
634 		}
635 	}
636 
637 	if (entry_hash >= *split_hash) {
638 		/* Add entry to block 2. */
639 		ext2_append_entry(block2, blksize,
640 		    (struct ext2fs_direct_2 *)dest, entry, csum_size);
641 
642 		/* Adjust length field of last entry of block 1. */
643 		last->e2d_reclen = htole16(block1 + blksize - (char *)last -
644 		    csum_size);
645 	} else {
646 		/* Add entry to block 1. */
647 		ext2_append_entry(block1, blksize, last, entry, csum_size);
648 
649 		/* Adjust length field of last entry of block 2. */
650 		((struct ext2fs_direct_2 *)dest)->e2d_reclen =
651 		    htole16(block2 + blksize - dest - csum_size);
652 	}
653 
654 	if (csum_size) {
655 		ext2_init_dirent_tail(EXT2_DIRENT_TAIL(block1, blksize));
656 		ext2_init_dirent_tail(EXT2_DIRENT_TAIL(block2, blksize));
657 	}
658 
659 	return (0);
660 }
661 
662 /*
663  * Create an HTree index for a directory
664  */
665 int
666 ext2_htree_create_index(struct vnode *vp, struct componentname *cnp,
667     struct ext2fs_direct_2 *new_entry)
668 {
669 	struct buf *bp = NULL;
670 	struct inode *dp;
671 	struct ext2fs *fs;
672 	struct m_ext2fs *m_fs;
673 	struct ext2fs_direct_2 *ep, *dotdot;
674 	struct ext2fs_htree_root *root;
675 	struct ext2fs_htree_lookup_info info;
676 	uint32_t blksize, dirlen, split_hash;
677 	uint32_t hash_seed[4];
678 	uint8_t hash_version;
679 	char *buf1 = NULL;
680 	char *buf2 = NULL;
681 	int error = 0;
682 
683 	dp = VTOI(vp);
684 	fs = dp->i_e2fs->e2fs;
685 	m_fs = dp->i_e2fs;
686 	blksize = m_fs->e2fs_bsize;
687 
688 	buf1 = malloc(blksize, M_TEMP, M_WAITOK | M_ZERO);
689 	buf2 = malloc(blksize, M_TEMP, M_WAITOK | M_ZERO);
690 
691 	if ((error = ext2_blkatoff(vp, 0, NULL, &bp)) != 0)
692 		goto out;
693 
694 	root = (struct ext2fs_htree_root *)bp->b_data;
695 	dotdot = (struct ext2fs_direct_2 *)((char *)&(root->h_dotdot));
696 	ep = (struct ext2fs_direct_2 *)((char *)dotdot +
697 	    le16toh(dotdot->e2d_reclen));
698 	dirlen = (char *)root + blksize - (char *)ep;
699 	memcpy(buf1, ep, dirlen);
700 	ep = (struct ext2fs_direct_2 *)buf1;
701 	while ((char *)ep < buf1 + dirlen)
702 		ep = (struct ext2fs_direct_2 *)
703 		    ((char *)ep + le16toh(ep->e2d_reclen));
704 	ep->e2d_reclen = htole16(buf1 + blksize - (char *)ep);
705 
706 	dp->i_flag |= IN_E3INDEX;
707 
708 	/*
709 	 * Initialize index root.
710 	 */
711 	dotdot->e2d_reclen = htole16(blksize - EXT2_DIR_REC_LEN(1));
712 	memset(&root->h_info, 0, sizeof(root->h_info));
713 	root->h_info.h_hash_version = fs->e3fs_def_hash_version;
714 	root->h_info.h_info_len = sizeof(root->h_info);
715 	ext2_htree_set_block(root->h_entries, 1);
716 	ext2_htree_set_count(root->h_entries, 1);
717 	ext2_htree_set_limit(root->h_entries,
718 	    ext2_htree_root_limit(dp, sizeof(root->h_info)));
719 
720 	memset(&info, 0, sizeof(info));
721 	info.h_levels_num = 1;
722 	info.h_levels[0].h_entries = root->h_entries;
723 	info.h_levels[0].h_entry = root->h_entries;
724 
725 	hash_version = root->h_info.h_hash_version;
726 	if (hash_version <= EXT2_HTREE_TEA)
727 		hash_version += m_fs->e2fs_uhash;
728 	ext2_get_hash_seed(fs, hash_seed);
729 	ext2_htree_split_dirblock(dp, buf1, buf2, blksize, hash_seed,
730 	    hash_version, &split_hash, new_entry);
731 	ext2_htree_insert_entry(&info, split_hash, 2);
732 
733 	/*
734 	 * Write directory block 0.
735 	 */
736 	ext2_dx_csum_set(dp, (struct ext2fs_direct_2 *)bp->b_data);
737 	if (DOINGASYNC(vp)) {
738 		bdwrite(bp);
739 		error = 0;
740 	} else {
741 		error = bwrite(bp);
742 	}
743 	dp->i_flag |= IN_CHANGE | IN_UPDATE;
744 	if (error)
745 		goto out;
746 
747 	/*
748 	 * Write directory block 1.
749 	 */
750 	ext2_dirent_csum_set(dp, (struct ext2fs_direct_2 *)buf1);
751 	error = ext2_htree_append_block(vp, buf1, cnp, blksize);
752 	if (error)
753 		goto out1;
754 
755 	/*
756 	 * Write directory block 2.
757 	 */
758 	ext2_dirent_csum_set(dp, (struct ext2fs_direct_2 *)buf2);
759 	error = ext2_htree_append_block(vp, buf2, cnp, blksize);
760 
761 	free(buf1, M_TEMP);
762 	free(buf2, M_TEMP);
763 	return (error);
764 out:
765 	if (bp != NULL)
766 		brelse(bp);
767 out1:
768 	free(buf1, M_TEMP);
769 	free(buf2, M_TEMP);
770 	return (error);
771 }
772 
773 /*
774  * Add an entry to the directory using htree index.
775  */
776 int
777 ext2_htree_add_entry(struct vnode *dvp, struct ext2fs_direct_2 *entry,
778     struct componentname *cnp)
779 {
780 	struct ext2fs_htree_entry *entries, *leaf_node;
781 	struct ext2fs_htree_lookup_info info;
782 	struct buf *bp = NULL;
783 	struct ext2fs *fs;
784 	struct m_ext2fs *m_fs;
785 	struct inode *ip;
786 	uint16_t ent_num;
787 	uint32_t dirhash, split_hash;
788 	uint32_t blksize, blknum;
789 	uint64_t cursize, dirsize;
790 	uint32_t hash_seed[4];
791 	uint8_t hash_version;
792 	char *newdirblock = NULL;
793 	char *newidxblock = NULL;
794 	struct ext2fs_htree_node *dst_node;
795 	struct ext2fs_htree_entry *dst_entries;
796 	struct ext2fs_htree_entry *root_entires;
797 	struct buf *dst_bp = NULL;
798 	int error, write_bp = 0, write_dst_bp = 0, write_info = 0;
799 
800 	ip = VTOI(dvp);
801 	m_fs = ip->i_e2fs;
802 	fs = m_fs->e2fs;
803 	blksize = m_fs->e2fs_bsize;
804 
805 	if (ip->i_count != 0)
806 		return ext2_add_entry(dvp, entry);
807 
808 	/* Target directory block is full, split it */
809 	memset(&info, 0, sizeof(info));
810 	error = ext2_htree_find_leaf(ip, entry->e2d_name, entry->e2d_namlen,
811 	    &dirhash, &hash_version, &info);
812 	if (error)
813 		return (error);
814 
815 	entries = info.h_levels[info.h_levels_num - 1].h_entries;
816 	ent_num = ext2_htree_get_count(entries);
817 	if (ent_num == ext2_htree_get_limit(entries)) {
818 		/* Split the index node. */
819 		root_entires = info.h_levels[0].h_entries;
820 		newidxblock = malloc(blksize, M_TEMP, M_WAITOK | M_ZERO);
821 		dst_node = (struct ext2fs_htree_node *)newidxblock;
822 		memset(&dst_node->h_fake_dirent, 0,
823 		    sizeof(dst_node->h_fake_dirent));
824 		dst_node->h_fake_dirent.e2d_reclen = htole16(blksize);
825 
826 		cursize = roundup(ip->i_size, blksize);
827 		dirsize = cursize + blksize;
828 		blknum = dirsize / blksize - 1;
829 		ext2_dx_csum_set(ip, (struct ext2fs_direct_2 *)newidxblock);
830 		error = ext2_htree_append_block(dvp, newidxblock,
831 		    cnp, blksize);
832 		if (error)
833 			goto finish;
834 		error = ext2_blkatoff(dvp, cursize, NULL, &dst_bp);
835 		if (error)
836 			goto finish;
837 		dst_node = (struct ext2fs_htree_node *)dst_bp->b_data;
838 		dst_entries = dst_node->h_entries;
839 
840 		if (info.h_levels_num == 2) {
841 			uint16_t src_ent_num, dst_ent_num;
842 
843 			if (ext2_htree_get_count(root_entires) ==
844 			    ext2_htree_get_limit(root_entires)) {
845 				SDT_PROBE2(ext2fs, , trace, htree, 1,
846 				    "directory index is full");
847 				error = EIO;
848 				goto finish;
849 			}
850 
851 			src_ent_num = ent_num / 2;
852 			dst_ent_num = ent_num - src_ent_num;
853 			split_hash = ext2_htree_get_hash(entries + src_ent_num);
854 
855 			/* Move half of index entries to the new index node */
856 			memcpy(dst_entries, entries + src_ent_num,
857 			    dst_ent_num * sizeof(struct ext2fs_htree_entry));
858 			ext2_htree_set_count(entries, src_ent_num);
859 			ext2_htree_set_count(dst_entries, dst_ent_num);
860 			ext2_htree_set_limit(dst_entries,
861 			    ext2_htree_node_limit(ip));
862 
863 			if (info.h_levels[1].h_entry >= entries + src_ent_num) {
864 				struct buf *tmp = info.h_levels[1].h_bp;
865 
866 				info.h_levels[1].h_bp = dst_bp;
867 				dst_bp = tmp;
868 
869 				info.h_levels[1].h_entry =
870 				    info.h_levels[1].h_entry -
871 				    (entries + src_ent_num) +
872 				    dst_entries;
873 				info.h_levels[1].h_entries = dst_entries;
874 			}
875 			ext2_htree_insert_entry_to_level(&info.h_levels[0],
876 			    split_hash, blknum);
877 
878 			/* Write new index node to disk */
879 			ext2_dx_csum_set(ip,
880 			    (struct ext2fs_direct_2 *)dst_bp->b_data);
881 			error = bwrite(dst_bp);
882 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
883 			if (error)
884 				goto finish;
885 			write_dst_bp = 1;
886 		} else {
887 			/* Create second level for htree index */
888 			struct ext2fs_htree_root *idx_root;
889 
890 			memcpy(dst_entries, entries,
891 			    ent_num * sizeof(struct ext2fs_htree_entry));
892 			ext2_htree_set_limit(dst_entries,
893 			    ext2_htree_node_limit(ip));
894 
895 			idx_root = (struct ext2fs_htree_root *)
896 			    info.h_levels[0].h_bp->b_data;
897 			idx_root->h_info.h_ind_levels = 1;
898 
899 			ext2_htree_set_count(entries, 1);
900 			ext2_htree_set_block(entries, blknum);
901 
902 			info.h_levels_num = 2;
903 			info.h_levels[1].h_entries = dst_entries;
904 			info.h_levels[1].h_entry = info.h_levels[0].h_entry -
905 			    info.h_levels[0].h_entries + dst_entries;
906 			info.h_levels[1].h_bp = dst_bp;
907 			dst_bp = NULL;
908 		}
909 	}
910 
911 	leaf_node = info.h_levels[info.h_levels_num - 1].h_entry;
912 	blknum = ext2_htree_get_block(leaf_node);
913 	error = ext2_blkatoff(dvp, blknum * blksize, NULL, &bp);
914 	if (error)
915 		goto finish;
916 
917 	/* Split target directory block */
918 	newdirblock = malloc(blksize, M_TEMP, M_WAITOK | M_ZERO);
919 	ext2_get_hash_seed(fs, hash_seed);
920 	ext2_htree_split_dirblock(ip, (char *)bp->b_data, newdirblock, blksize,
921 	    hash_seed, hash_version, &split_hash, entry);
922 	cursize = roundup(ip->i_size, blksize);
923 	dirsize = cursize + blksize;
924 	blknum = dirsize / blksize - 1;
925 
926 	/* Add index entry for the new directory block */
927 	ext2_htree_insert_entry(&info, split_hash, blknum);
928 
929 	/* Write the new directory block to the end of the directory */
930 	ext2_dirent_csum_set(ip, (struct ext2fs_direct_2 *)newdirblock);
931 	error = ext2_htree_append_block(dvp, newdirblock, cnp, blksize);
932 	if (error)
933 		goto finish;
934 
935 	/* Write the target directory block */
936 	ext2_dirent_csum_set(ip, (struct ext2fs_direct_2 *)bp->b_data);
937 	error = bwrite(bp);
938 	ip->i_flag |= IN_CHANGE | IN_UPDATE;
939 	if (error)
940 		goto finish;
941 	write_bp = 1;
942 
943 	/* Write the index block */
944 	error = ext2_htree_writebuf(ip, &info);
945 	if (!error)
946 		write_info = 1;
947 
948 finish:
949 	if (dst_bp != NULL && !write_dst_bp)
950 		brelse(dst_bp);
951 	if (bp != NULL && !write_bp)
952 		brelse(bp);
953 	if (newdirblock != NULL)
954 		free(newdirblock, M_TEMP);
955 	if (newidxblock != NULL)
956 		free(newidxblock, M_TEMP);
957 	if (!write_info)
958 		ext2_htree_release(&info);
959 	return (error);
960 }
961