xref: /freebsd/sys/fs/ext2fs/ext2_htree.c (revision d5b0e70f7e04d971691517ce1304d86a1e367e2e)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2010, 2012 Zheng Liu <lz@freebsd.org>
5  * Copyright (c) 2012, Vyacheslav Matyushin
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * $FreeBSD$
30  */
31 
32 #include <sys/param.h>
33 #include <sys/endian.h>
34 #include <sys/systm.h>
35 #include <sys/namei.h>
36 #include <sys/bio.h>
37 #include <sys/buf.h>
38 #include <sys/endian.h>
39 #include <sys/mount.h>
40 #include <sys/vnode.h>
41 #include <sys/malloc.h>
42 #include <sys/dirent.h>
43 #include <sys/sdt.h>
44 #include <sys/sysctl.h>
45 
46 #include <ufs/ufs/dir.h>
47 
48 #include <fs/ext2fs/fs.h>
49 #include <fs/ext2fs/inode.h>
50 #include <fs/ext2fs/ext2_mount.h>
51 #include <fs/ext2fs/ext2fs.h>
52 #include <fs/ext2fs/fs.h>
53 #include <fs/ext2fs/ext2_extern.h>
54 #include <fs/ext2fs/ext2_dinode.h>
55 #include <fs/ext2fs/ext2_dir.h>
56 #include <fs/ext2fs/htree.h>
57 
58 SDT_PROVIDER_DECLARE(ext2fs);
59 /*
60  * ext2fs trace probe:
61  * arg0: verbosity. Higher numbers give more verbose messages
62  * arg1: Textual message
63  */
64 SDT_PROBE_DEFINE2(ext2fs, , trace, htree, "int", "char*");
65 
66 static void	ext2_append_entry(char *block, uint32_t blksize,
67 		    struct ext2fs_direct_2 *last_entry,
68 		    struct ext2fs_direct_2 *new_entry, int csum_size);
69 static int	ext2_htree_append_block(struct vnode *vp, char *data,
70 		    struct componentname *cnp, uint32_t blksize);
71 static int	ext2_htree_check_next(struct inode *ip, uint32_t hash,
72 		    const char *name, struct ext2fs_htree_lookup_info *info);
73 static int	ext2_htree_cmp_sort_entry(const void *e1, const void *e2);
74 static int	ext2_htree_find_leaf(struct inode *ip, const char *name,
75 		    int namelen, uint32_t *hash, uint8_t *hash_version,
76 		    struct ext2fs_htree_lookup_info *info);
77 static uint32_t ext2_htree_get_block(struct ext2fs_htree_entry *ep);
78 static uint16_t	ext2_htree_get_count(struct ext2fs_htree_entry *ep);
79 static uint32_t ext2_htree_get_hash(struct ext2fs_htree_entry *ep);
80 static uint16_t	ext2_htree_get_limit(struct ext2fs_htree_entry *ep);
81 static void	ext2_htree_insert_entry_to_level(struct ext2fs_htree_lookup_level *level,
82 		    uint32_t hash, uint32_t blk);
83 static void	ext2_htree_insert_entry(struct ext2fs_htree_lookup_info *info,
84 		    uint32_t hash, uint32_t blk);
85 static uint32_t	ext2_htree_node_limit(struct inode *ip);
86 static void	ext2_htree_set_block(struct ext2fs_htree_entry *ep,
87 		    uint32_t blk);
88 static void	ext2_htree_set_count(struct ext2fs_htree_entry *ep,
89 		    uint16_t cnt);
90 static void	ext2_htree_set_hash(struct ext2fs_htree_entry *ep,
91 		    uint32_t hash);
92 static void	ext2_htree_set_limit(struct ext2fs_htree_entry *ep,
93 		    uint16_t limit);
94 static int	ext2_htree_split_dirblock(struct inode *ip,
95 		    char *block1, char *block2, uint32_t blksize,
96 		    uint32_t *hash_seed, uint8_t hash_version,
97 		    uint32_t *split_hash, struct  ext2fs_direct_2 *entry);
98 static void	ext2_htree_release(struct ext2fs_htree_lookup_info *info);
99 static uint32_t	ext2_htree_root_limit(struct inode *ip, int len);
100 static int	ext2_htree_writebuf(struct inode *ip,
101 		    struct ext2fs_htree_lookup_info *info);
102 
103 int
104 ext2_htree_has_idx(struct inode *ip)
105 {
106 	if (EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_DIRHASHINDEX) &&
107 	    ip->i_flag & IN_E3INDEX)
108 		return (1);
109 	else
110 		return (0);
111 }
112 
113 static int
114 ext2_htree_check_next(struct inode *ip, uint32_t hash, const char *name,
115     struct ext2fs_htree_lookup_info *info)
116 {
117 	struct vnode *vp = ITOV(ip);
118 	struct ext2fs_htree_lookup_level *level;
119 	struct buf *bp;
120 	uint32_t next_hash;
121 	int idx = info->h_levels_num - 1;
122 	int levels = 0;
123 
124 	do {
125 		level = &info->h_levels[idx];
126 		level->h_entry++;
127 		if (level->h_entry < level->h_entries +
128 		    ext2_htree_get_count(level->h_entries))
129 			break;
130 		if (idx == 0)
131 			return (0);
132 		idx--;
133 		levels++;
134 	} while (1);
135 
136 	next_hash = ext2_htree_get_hash(level->h_entry);
137 	if ((hash & 1) == 0) {
138 		if (hash != (next_hash & ~1))
139 			return (0);
140 	}
141 
142 	while (levels > 0) {
143 		levels--;
144 		if (ext2_blkatoff(vp, ext2_htree_get_block(level->h_entry) *
145 		    ip->i_e2fs->e2fs_bsize, NULL, &bp) != 0)
146 			return (0);
147 		level = &info->h_levels[idx + 1];
148 		brelse(level->h_bp);
149 		level->h_bp = bp;
150 		level->h_entry = level->h_entries =
151 		    ((struct ext2fs_htree_node *)bp->b_data)->h_entries;
152 	}
153 
154 	return (1);
155 }
156 
157 static uint32_t
158 ext2_htree_get_block(struct ext2fs_htree_entry *ep)
159 {
160 	return (le32toh(ep->h_blk) & 0x00FFFFFF);
161 }
162 
163 static void
164 ext2_htree_set_block(struct ext2fs_htree_entry *ep, uint32_t blk)
165 {
166 	ep->h_blk = htole32(blk);
167 }
168 
169 static uint16_t
170 ext2_htree_get_count(struct ext2fs_htree_entry *ep)
171 {
172 	return (le16toh(((struct ext2fs_htree_count *)(ep))->h_entries_num));
173 }
174 
175 static void
176 ext2_htree_set_count(struct ext2fs_htree_entry *ep, uint16_t cnt)
177 {
178 	((struct ext2fs_htree_count *)(ep))->h_entries_num = htole16(cnt);
179 }
180 
181 static uint32_t
182 ext2_htree_get_hash(struct ext2fs_htree_entry *ep)
183 {
184 	return (le32toh(ep->h_hash));
185 }
186 
187 static uint16_t
188 ext2_htree_get_limit(struct ext2fs_htree_entry *ep)
189 {
190 	return (le16toh(((struct ext2fs_htree_count *)(ep))->h_entries_max));
191 }
192 
193 static void
194 ext2_htree_set_hash(struct ext2fs_htree_entry *ep, uint32_t hash)
195 {
196 	ep->h_hash = htole32(hash);
197 }
198 
199 static void
200 ext2_htree_set_limit(struct ext2fs_htree_entry *ep, uint16_t limit)
201 {
202 	((struct ext2fs_htree_count *)(ep))->h_entries_max = htole16(limit);
203 }
204 
205 static void
206 ext2_htree_release(struct ext2fs_htree_lookup_info *info)
207 {
208 	u_int i;
209 
210 	for (i = 0; i < info->h_levels_num; i++) {
211 		struct buf *bp = info->h_levels[i].h_bp;
212 
213 		if (bp != NULL)
214 			brelse(bp);
215 	}
216 }
217 
218 static uint32_t
219 ext2_htree_root_limit(struct inode *ip, int len)
220 {
221 	struct m_ext2fs *fs;
222 	uint32_t space;
223 
224 	fs = ip->i_e2fs;
225 	space = ip->i_e2fs->e2fs_bsize - EXT2_DIR_REC_LEN(1) -
226 	    EXT2_DIR_REC_LEN(2) - len;
227 
228 	if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM))
229 		space -= sizeof(struct ext2fs_htree_tail);
230 
231 	return (space / sizeof(struct ext2fs_htree_entry));
232 }
233 
234 static uint32_t
235 ext2_htree_node_limit(struct inode *ip)
236 {
237 	struct m_ext2fs *fs;
238 	uint32_t space;
239 
240 	fs = ip->i_e2fs;
241 	space = fs->e2fs_bsize - EXT2_DIR_REC_LEN(0);
242 
243 	if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM))
244 		space -= sizeof(struct ext2fs_htree_tail);
245 
246 	return (space / sizeof(struct ext2fs_htree_entry));
247 }
248 
249 static void
250 ext2_get_hash_seed(struct ext2fs* es, uint32_t* seed)
251 {
252 
253 	for (int i = 0; i < 4; i++)
254 		seed[i] = le32toh(es->e3fs_hash_seed[i]);
255 }
256 
257 static int
258 ext2_htree_find_leaf(struct inode *ip, const char *name, int namelen,
259     uint32_t *hash, uint8_t *hash_ver,
260     struct ext2fs_htree_lookup_info *info)
261 {
262 	struct vnode *vp;
263 	struct ext2fs *fs;
264 	struct m_ext2fs *m_fs;
265 	struct buf *bp = NULL;
266 	struct ext2fs_htree_root *rootp;
267 	struct ext2fs_htree_entry *entp, *start, *end, *middle, *found;
268 	struct ext2fs_htree_lookup_level *level_info;
269 	uint32_t hash_major = 0, hash_minor = 0;
270 	uint32_t levels, cnt;
271 	uint32_t hash_seed[4];
272 	uint8_t hash_version;
273 
274 	if (name == NULL || info == NULL)
275 		return (-1);
276 
277 	vp = ITOV(ip);
278 	fs = ip->i_e2fs->e2fs;
279 	m_fs = ip->i_e2fs;
280 
281 	if (ext2_blkatoff(vp, 0, NULL, &bp) != 0)
282 		return (-1);
283 
284 	info->h_levels_num = 1;
285 	info->h_levels[0].h_bp = bp;
286 	rootp = (struct ext2fs_htree_root *)bp->b_data;
287 	if (rootp->h_info.h_hash_version != EXT2_HTREE_LEGACY &&
288 	    rootp->h_info.h_hash_version != EXT2_HTREE_HALF_MD4 &&
289 	    rootp->h_info.h_hash_version != EXT2_HTREE_TEA)
290 		goto error;
291 
292 	hash_version = rootp->h_info.h_hash_version;
293 	if (hash_version <= EXT2_HTREE_TEA)
294 		hash_version += m_fs->e2fs_uhash;
295 	*hash_ver = hash_version;
296 
297 	ext2_get_hash_seed(fs, hash_seed);
298 	ext2_htree_hash(name, namelen, hash_seed,
299 	    hash_version, &hash_major, &hash_minor);
300 	*hash = hash_major;
301 
302 	if ((levels = rootp->h_info.h_ind_levels) > 1)
303 		goto error;
304 
305 	entp = (struct ext2fs_htree_entry *)(((char *)&rootp->h_info) +
306 	    rootp->h_info.h_info_len);
307 
308 	if (ext2_htree_get_limit(entp) !=
309 	    ext2_htree_root_limit(ip, rootp->h_info.h_info_len))
310 		goto error;
311 
312 	while (1) {
313 		cnt = ext2_htree_get_count(entp);
314 		if (cnt == 0 || cnt > ext2_htree_get_limit(entp))
315 			goto error;
316 
317 		start = entp + 1;
318 		end = entp + cnt - 1;
319 		while (start <= end) {
320 			middle = start + (end - start) / 2;
321 			if (ext2_htree_get_hash(middle) > hash_major)
322 				end = middle - 1;
323 			else
324 				start = middle + 1;
325 		}
326 		found = start - 1;
327 
328 		level_info = &(info->h_levels[info->h_levels_num - 1]);
329 		level_info->h_bp = bp;
330 		level_info->h_entries = entp;
331 		level_info->h_entry = found;
332 		if (levels == 0)
333 			return (0);
334 		levels--;
335 		if (ext2_blkatoff(vp,
336 		    ext2_htree_get_block(found) * m_fs->e2fs_bsize,
337 		    NULL, &bp) != 0)
338 			goto error;
339 		entp = ((struct ext2fs_htree_node *)bp->b_data)->h_entries;
340 		info->h_levels_num++;
341 		info->h_levels[info->h_levels_num - 1].h_bp = bp;
342 	}
343 
344 error:
345 	ext2_htree_release(info);
346 	return (-1);
347 }
348 
349 /*
350  * Try to lookup a directory entry in HTree index
351  */
352 int
353 ext2_htree_lookup(struct inode *ip, const char *name, int namelen,
354     struct buf **bpp, int *entryoffp, doff_t *offp,
355     doff_t *prevoffp, doff_t *endusefulp,
356     struct ext2fs_searchslot *ss)
357 {
358 	struct vnode *vp;
359 	struct ext2fs_htree_lookup_info info;
360 	struct ext2fs_htree_entry *leaf_node;
361 	struct m_ext2fs *m_fs;
362 	struct buf *bp;
363 	uint32_t blk;
364 	uint32_t dirhash;
365 	uint32_t bsize;
366 	uint8_t hash_version;
367 	int search_next;
368 	int found = 0;
369 
370 	m_fs = ip->i_e2fs;
371 	bsize = m_fs->e2fs_bsize;
372 	vp = ITOV(ip);
373 
374 	/* TODO: print error msg because we don't lookup '.' and '..' */
375 
376 	memset(&info, 0, sizeof(info));
377 	if (ext2_htree_find_leaf(ip, name, namelen, &dirhash,
378 	    &hash_version, &info))
379 		return (-1);
380 
381 	do {
382 		leaf_node = info.h_levels[info.h_levels_num - 1].h_entry;
383 		blk = ext2_htree_get_block(leaf_node);
384 		if (ext2_blkatoff(vp, blk * bsize, NULL, &bp) != 0) {
385 			ext2_htree_release(&info);
386 			return (-1);
387 		}
388 
389 		*offp = blk * bsize;
390 		*entryoffp = 0;
391 		*prevoffp = blk * bsize;
392 		*endusefulp = blk * bsize;
393 
394 		if (ss->slotstatus == NONE) {
395 			ss->slotoffset = -1;
396 			ss->slotfreespace = 0;
397 		}
398 
399 		if (ext2_search_dirblock(ip, bp->b_data, &found,
400 		    name, namelen, entryoffp, offp, prevoffp,
401 		    endusefulp, ss) != 0) {
402 			brelse(bp);
403 			ext2_htree_release(&info);
404 			return (-1);
405 		}
406 
407 		if (found) {
408 			*bpp = bp;
409 			ext2_htree_release(&info);
410 			return (0);
411 		}
412 
413 		brelse(bp);
414 		search_next = ext2_htree_check_next(ip, dirhash, name, &info);
415 	} while (search_next);
416 
417 	ext2_htree_release(&info);
418 	return (ENOENT);
419 }
420 
421 static int
422 ext2_htree_append_block(struct vnode *vp, char *data,
423     struct componentname *cnp, uint32_t blksize)
424 {
425 	struct iovec aiov;
426 	struct uio auio;
427 	struct inode *dp = VTOI(vp);
428 	uint64_t cursize, newsize;
429 	int error;
430 
431 	cursize = roundup(dp->i_size, blksize);
432 	newsize = cursize + blksize;
433 
434 	auio.uio_offset = cursize;
435 	auio.uio_resid = blksize;
436 	aiov.iov_len = blksize;
437 	aiov.iov_base = data;
438 	auio.uio_iov = &aiov;
439 	auio.uio_iovcnt = 1;
440 	auio.uio_rw = UIO_WRITE;
441 	auio.uio_segflg = UIO_SYSSPACE;
442 	auio.uio_td = NULL;
443 	error = VOP_WRITE(vp, &auio, IO_SYNC, cnp->cn_cred);
444 	if (!error)
445 		dp->i_size = newsize;
446 
447 	return (error);
448 }
449 
450 static int
451 ext2_htree_writebuf(struct inode* ip, struct ext2fs_htree_lookup_info *info)
452 {
453 	int i, error;
454 
455 	for (i = 0; i < info->h_levels_num; i++) {
456 		struct buf *bp = info->h_levels[i].h_bp;
457 		ext2_dx_csum_set(ip, (struct ext2fs_direct_2 *)bp->b_data);
458 		error = bwrite(bp);
459 		if (error)
460 			return (error);
461 	}
462 
463 	return (0);
464 }
465 
466 static void
467 ext2_htree_insert_entry_to_level(struct ext2fs_htree_lookup_level *level,
468     uint32_t hash, uint32_t blk)
469 {
470 	struct ext2fs_htree_entry *target;
471 	int entries_num;
472 
473 	target = level->h_entry + 1;
474 	entries_num = ext2_htree_get_count(level->h_entries);
475 
476 	memmove(target + 1, target, (char *)(level->h_entries + entries_num) -
477 	    (char *)target);
478 	ext2_htree_set_block(target, blk);
479 	ext2_htree_set_hash(target, hash);
480 	ext2_htree_set_count(level->h_entries, entries_num + 1);
481 }
482 
483 /*
484  * Insert an index entry to the index node.
485  */
486 static void
487 ext2_htree_insert_entry(struct ext2fs_htree_lookup_info *info,
488     uint32_t hash, uint32_t blk)
489 {
490 	struct ext2fs_htree_lookup_level *level;
491 
492 	level = &info->h_levels[info->h_levels_num - 1];
493 	ext2_htree_insert_entry_to_level(level, hash, blk);
494 }
495 
496 /*
497  * Compare two entry sort descriptors by name hash value.
498  * This is used together with qsort.
499  */
500 static int
501 ext2_htree_cmp_sort_entry(const void *e1, const void *e2)
502 {
503 	const struct ext2fs_htree_sort_entry *entry1, *entry2;
504 
505 	entry1 = (const struct ext2fs_htree_sort_entry *)e1;
506 	entry2 = (const struct ext2fs_htree_sort_entry *)e2;
507 
508 	if (le32toh(entry1->h_hash) < le32toh(entry2->h_hash))
509 		return (-1);
510 	if (le32toh(entry1->h_hash) > le32toh(entry2->h_hash))
511 		return (1);
512 	return (0);
513 }
514 
515 /*
516  * Append an entry to the end of the directory block.
517  */
518 static void
519 ext2_append_entry(char *block, uint32_t blksize,
520     struct ext2fs_direct_2 *last_entry,
521     struct ext2fs_direct_2 *new_entry, int csum_size)
522 {
523 	uint16_t entry_len;
524 
525 	entry_len = EXT2_DIR_REC_LEN(last_entry->e2d_namlen);
526 	last_entry->e2d_reclen = htole16(entry_len);
527 	last_entry = (struct ext2fs_direct_2 *)((char *)last_entry + entry_len);
528 	new_entry->e2d_reclen = htole16(block + blksize - (char *)last_entry -
529 	    csum_size);
530 	memcpy(last_entry, new_entry, EXT2_DIR_REC_LEN(new_entry->e2d_namlen));
531 }
532 
533 /*
534  * Move half of entries from the old directory block to the new one.
535  */
536 static int
537 ext2_htree_split_dirblock(struct inode *ip, char *block1, char *block2,
538     uint32_t blksize, uint32_t *hash_seed, uint8_t hash_version,
539     uint32_t *split_hash, struct ext2fs_direct_2 *entry)
540 {
541 	struct m_ext2fs *fs;
542 	int entry_cnt = 0;
543 	int size = 0, csum_size = 0;
544 	int i, k;
545 	uint32_t offset;
546 	uint16_t entry_len = 0;
547 	uint32_t entry_hash;
548 	struct ext2fs_direct_2 *ep, *last;
549 	char *dest;
550 	struct ext2fs_htree_sort_entry *sort_info;
551 
552 	fs = ip->i_e2fs;
553 	ep = (struct ext2fs_direct_2 *)block1;
554 	dest = block2;
555 	sort_info = (struct ext2fs_htree_sort_entry *)
556 	    ((char *)block2 + blksize);
557 
558 	if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM))
559 		csum_size = sizeof(struct ext2fs_direct_tail);
560 
561 	/*
562 	 * Calculate name hash value for the entry which is to be added.
563 	 */
564 	ext2_htree_hash(entry->e2d_name, entry->e2d_namlen, hash_seed,
565 	    hash_version, &entry_hash, NULL);
566 
567 	/*
568 	 * Fill in directory entry sort descriptors.
569 	 */
570 	while ((char *)ep < block1 + blksize - csum_size) {
571 		if (le32toh(ep->e2d_ino) && ep->e2d_namlen) {
572 			entry_cnt++;
573 			sort_info--;
574 			sort_info->h_size = ep->e2d_reclen;
575 			sort_info->h_offset = htole16((char *)ep - block1);
576 			ext2_htree_hash(ep->e2d_name, ep->e2d_namlen,
577 			    hash_seed, hash_version,
578 			    &sort_info->h_hash, NULL);
579 			sort_info->h_hash = htole32(sort_info->h_hash);
580 		}
581 		ep = (struct ext2fs_direct_2 *)
582 		    ((char *)ep + le16toh(ep->e2d_reclen));
583 	}
584 
585 	/*
586 	 * Sort directory entry descriptors by name hash value.
587 	 */
588 	qsort(sort_info, entry_cnt, sizeof(struct ext2fs_htree_sort_entry),
589 	    ext2_htree_cmp_sort_entry);
590 
591 	/*
592 	 * Count the number of entries to move to directory block 2.
593 	 */
594 	for (i = entry_cnt - 1; i >= 0; i--) {
595 		if (le16toh(sort_info[i].h_size) + size > blksize / 2)
596 			break;
597 		size += le16toh(sort_info[i].h_size);
598 	}
599 
600 	*split_hash = le32toh(sort_info[i + 1].h_hash);
601 
602 	/*
603 	 * Set collision bit.
604 	 */
605 	if (*split_hash == le32toh(sort_info[i].h_hash))
606 		*split_hash += 1;
607 
608 	/*
609 	 * Move half of directory entries from block 1 to block 2.
610 	 */
611 	for (k = i + 1; k < entry_cnt; k++) {
612 		ep = (struct ext2fs_direct_2 *)((char *)block1 +
613 		    le16toh(sort_info[k].h_offset));
614 		entry_len = EXT2_DIR_REC_LEN(ep->e2d_namlen);
615 		memcpy(dest, ep, entry_len);
616 		((struct ext2fs_direct_2 *)dest)->e2d_reclen =
617 		    htole16(entry_len);
618 		/* Mark directory entry as unused. */
619 		ep->e2d_ino = 0;
620 		dest += entry_len;
621 	}
622 	dest -= entry_len;
623 
624 	/* Shrink directory entries in block 1. */
625 	last = (struct ext2fs_direct_2 *)block1;
626 	entry_len = 0;
627 	for (offset = 0; offset < blksize - csum_size; ) {
628 		ep = (struct ext2fs_direct_2 *)(block1 + offset);
629 		offset += le16toh(ep->e2d_reclen);
630 		if (le32toh(ep->e2d_ino)) {
631 			last = (struct ext2fs_direct_2 *)
632 			    ((char *)last + entry_len);
633 			entry_len = EXT2_DIR_REC_LEN(ep->e2d_namlen);
634 			memcpy((void *)last, (void *)ep, entry_len);
635 			last->e2d_reclen = htole16(entry_len);
636 		}
637 	}
638 
639 	if (entry_hash >= *split_hash) {
640 		/* Add entry to block 2. */
641 		ext2_append_entry(block2, blksize,
642 		    (struct ext2fs_direct_2 *)dest, entry, csum_size);
643 
644 		/* Adjust length field of last entry of block 1. */
645 		last->e2d_reclen = htole16(block1 + blksize - (char *)last -
646 		    csum_size);
647 	} else {
648 		/* Add entry to block 1. */
649 		ext2_append_entry(block1, blksize, last, entry, csum_size);
650 
651 		/* Adjust length field of last entry of block 2. */
652 		((struct ext2fs_direct_2 *)dest)->e2d_reclen =
653 		    htole16(block2 + blksize - dest - csum_size);
654 	}
655 
656 	if (csum_size) {
657 		ext2_init_dirent_tail(EXT2_DIRENT_TAIL(block1, blksize));
658 		ext2_init_dirent_tail(EXT2_DIRENT_TAIL(block2, blksize));
659 	}
660 
661 	return (0);
662 }
663 
664 /*
665  * Create an HTree index for a directory
666  */
667 int
668 ext2_htree_create_index(struct vnode *vp, struct componentname *cnp,
669     struct ext2fs_direct_2 *new_entry)
670 {
671 	struct buf *bp = NULL;
672 	struct inode *dp;
673 	struct ext2fs *fs;
674 	struct m_ext2fs *m_fs;
675 	struct ext2fs_direct_2 *ep, *dotdot;
676 	struct ext2fs_htree_root *root;
677 	struct ext2fs_htree_lookup_info info;
678 	uint32_t blksize, dirlen, split_hash;
679 	uint32_t hash_seed[4];
680 	uint8_t hash_version;
681 	char *buf1 = NULL;
682 	char *buf2 = NULL;
683 	int error = 0;
684 
685 	dp = VTOI(vp);
686 	fs = dp->i_e2fs->e2fs;
687 	m_fs = dp->i_e2fs;
688 	blksize = m_fs->e2fs_bsize;
689 
690 	buf1 = malloc(blksize, M_TEMP, M_WAITOK | M_ZERO);
691 	buf2 = malloc(blksize, M_TEMP, M_WAITOK | M_ZERO);
692 
693 	if ((error = ext2_blkatoff(vp, 0, NULL, &bp)) != 0)
694 		goto out;
695 
696 	root = (struct ext2fs_htree_root *)bp->b_data;
697 	dotdot = (struct ext2fs_direct_2 *)((char *)&(root->h_dotdot));
698 	ep = (struct ext2fs_direct_2 *)((char *)dotdot +
699 	    le16toh(dotdot->e2d_reclen));
700 	dirlen = (char *)root + blksize - (char *)ep;
701 	memcpy(buf1, ep, dirlen);
702 	ep = (struct ext2fs_direct_2 *)buf1;
703 	while ((char *)ep < buf1 + dirlen)
704 		ep = (struct ext2fs_direct_2 *)
705 		    ((char *)ep + le16toh(ep->e2d_reclen));
706 	ep->e2d_reclen = htole16(buf1 + blksize - (char *)ep);
707 
708 	dp->i_flag |= IN_E3INDEX;
709 
710 	/*
711 	 * Initialize index root.
712 	 */
713 	dotdot->e2d_reclen = htole16(blksize - EXT2_DIR_REC_LEN(1));
714 	memset(&root->h_info, 0, sizeof(root->h_info));
715 	root->h_info.h_hash_version = fs->e3fs_def_hash_version;
716 	root->h_info.h_info_len = sizeof(root->h_info);
717 	ext2_htree_set_block(root->h_entries, 1);
718 	ext2_htree_set_count(root->h_entries, 1);
719 	ext2_htree_set_limit(root->h_entries,
720 	    ext2_htree_root_limit(dp, sizeof(root->h_info)));
721 
722 	memset(&info, 0, sizeof(info));
723 	info.h_levels_num = 1;
724 	info.h_levels[0].h_entries = root->h_entries;
725 	info.h_levels[0].h_entry = root->h_entries;
726 
727 	hash_version = root->h_info.h_hash_version;
728 	if (hash_version <= EXT2_HTREE_TEA)
729 		hash_version += m_fs->e2fs_uhash;
730 	ext2_get_hash_seed(fs, hash_seed);
731 	ext2_htree_split_dirblock(dp, buf1, buf2, blksize, hash_seed,
732 	    hash_version, &split_hash, new_entry);
733 	ext2_htree_insert_entry(&info, split_hash, 2);
734 
735 	/*
736 	 * Write directory block 0.
737 	 */
738 	ext2_dx_csum_set(dp, (struct ext2fs_direct_2 *)bp->b_data);
739 	if (DOINGASYNC(vp)) {
740 		bdwrite(bp);
741 		error = 0;
742 	} else {
743 		error = bwrite(bp);
744 	}
745 	dp->i_flag |= IN_CHANGE | IN_UPDATE;
746 	if (error)
747 		goto out;
748 
749 	/*
750 	 * Write directory block 1.
751 	 */
752 	ext2_dirent_csum_set(dp, (struct ext2fs_direct_2 *)buf1);
753 	error = ext2_htree_append_block(vp, buf1, cnp, blksize);
754 	if (error)
755 		goto out1;
756 
757 	/*
758 	 * Write directory block 2.
759 	 */
760 	ext2_dirent_csum_set(dp, (struct ext2fs_direct_2 *)buf2);
761 	error = ext2_htree_append_block(vp, buf2, cnp, blksize);
762 
763 	free(buf1, M_TEMP);
764 	free(buf2, M_TEMP);
765 	return (error);
766 out:
767 	if (bp != NULL)
768 		brelse(bp);
769 out1:
770 	free(buf1, M_TEMP);
771 	free(buf2, M_TEMP);
772 	return (error);
773 }
774 
775 /*
776  * Add an entry to the directory using htree index.
777  */
778 int
779 ext2_htree_add_entry(struct vnode *dvp, struct ext2fs_direct_2 *entry,
780     struct componentname *cnp)
781 {
782 	struct ext2fs_htree_entry *entries, *leaf_node;
783 	struct ext2fs_htree_lookup_info info;
784 	struct buf *bp = NULL;
785 	struct ext2fs *fs;
786 	struct m_ext2fs *m_fs;
787 	struct inode *ip;
788 	uint16_t ent_num;
789 	uint32_t dirhash, split_hash;
790 	uint32_t blksize, blknum;
791 	uint64_t cursize, dirsize;
792 	uint32_t hash_seed[4];
793 	uint8_t hash_version;
794 	char *newdirblock = NULL;
795 	char *newidxblock = NULL;
796 	struct ext2fs_htree_node *dst_node;
797 	struct ext2fs_htree_entry *dst_entries;
798 	struct ext2fs_htree_entry *root_entires;
799 	struct buf *dst_bp = NULL;
800 	int error, write_bp = 0, write_dst_bp = 0, write_info = 0;
801 
802 	ip = VTOI(dvp);
803 	m_fs = ip->i_e2fs;
804 	fs = m_fs->e2fs;
805 	blksize = m_fs->e2fs_bsize;
806 
807 	if (ip->i_count != 0)
808 		return ext2_add_entry(dvp, entry);
809 
810 	/* Target directory block is full, split it */
811 	memset(&info, 0, sizeof(info));
812 	error = ext2_htree_find_leaf(ip, entry->e2d_name, entry->e2d_namlen,
813 	    &dirhash, &hash_version, &info);
814 	if (error)
815 		return (error);
816 
817 	entries = info.h_levels[info.h_levels_num - 1].h_entries;
818 	ent_num = ext2_htree_get_count(entries);
819 	if (ent_num == ext2_htree_get_limit(entries)) {
820 		/* Split the index node. */
821 		root_entires = info.h_levels[0].h_entries;
822 		newidxblock = malloc(blksize, M_TEMP, M_WAITOK | M_ZERO);
823 		dst_node = (struct ext2fs_htree_node *)newidxblock;
824 		memset(&dst_node->h_fake_dirent, 0,
825 		    sizeof(dst_node->h_fake_dirent));
826 		dst_node->h_fake_dirent.e2d_reclen = htole16(blksize);
827 
828 		cursize = roundup(ip->i_size, blksize);
829 		dirsize = cursize + blksize;
830 		blknum = dirsize / blksize - 1;
831 		ext2_dx_csum_set(ip, (struct ext2fs_direct_2 *)newidxblock);
832 		error = ext2_htree_append_block(dvp, newidxblock,
833 		    cnp, blksize);
834 		if (error)
835 			goto finish;
836 		error = ext2_blkatoff(dvp, cursize, NULL, &dst_bp);
837 		if (error)
838 			goto finish;
839 		dst_node = (struct ext2fs_htree_node *)dst_bp->b_data;
840 		dst_entries = dst_node->h_entries;
841 
842 		if (info.h_levels_num == 2) {
843 			uint16_t src_ent_num, dst_ent_num;
844 
845 			if (ext2_htree_get_count(root_entires) ==
846 			    ext2_htree_get_limit(root_entires)) {
847 				SDT_PROBE2(ext2fs, , trace, htree, 1,
848 				    "directory index is full");
849 				error = EIO;
850 				goto finish;
851 			}
852 
853 			src_ent_num = ent_num / 2;
854 			dst_ent_num = ent_num - src_ent_num;
855 			split_hash = ext2_htree_get_hash(entries + src_ent_num);
856 
857 			/* Move half of index entries to the new index node */
858 			memcpy(dst_entries, entries + src_ent_num,
859 			    dst_ent_num * sizeof(struct ext2fs_htree_entry));
860 			ext2_htree_set_count(entries, src_ent_num);
861 			ext2_htree_set_count(dst_entries, dst_ent_num);
862 			ext2_htree_set_limit(dst_entries,
863 			    ext2_htree_node_limit(ip));
864 
865 			if (info.h_levels[1].h_entry >= entries + src_ent_num) {
866 				struct buf *tmp = info.h_levels[1].h_bp;
867 
868 				info.h_levels[1].h_bp = dst_bp;
869 				dst_bp = tmp;
870 
871 				info.h_levels[1].h_entry =
872 				    info.h_levels[1].h_entry -
873 				    (entries + src_ent_num) +
874 				    dst_entries;
875 				info.h_levels[1].h_entries = dst_entries;
876 			}
877 			ext2_htree_insert_entry_to_level(&info.h_levels[0],
878 			    split_hash, blknum);
879 
880 			/* Write new index node to disk */
881 			ext2_dx_csum_set(ip,
882 			    (struct ext2fs_direct_2 *)dst_bp->b_data);
883 			error = bwrite(dst_bp);
884 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
885 			if (error)
886 				goto finish;
887 			write_dst_bp = 1;
888 		} else {
889 			/* Create second level for htree index */
890 			struct ext2fs_htree_root *idx_root;
891 
892 			memcpy(dst_entries, entries,
893 			    ent_num * sizeof(struct ext2fs_htree_entry));
894 			ext2_htree_set_limit(dst_entries,
895 			    ext2_htree_node_limit(ip));
896 
897 			idx_root = (struct ext2fs_htree_root *)
898 			    info.h_levels[0].h_bp->b_data;
899 			idx_root->h_info.h_ind_levels = 1;
900 
901 			ext2_htree_set_count(entries, 1);
902 			ext2_htree_set_block(entries, blknum);
903 
904 			info.h_levels_num = 2;
905 			info.h_levels[1].h_entries = dst_entries;
906 			info.h_levels[1].h_entry = info.h_levels[0].h_entry -
907 			    info.h_levels[0].h_entries + dst_entries;
908 			info.h_levels[1].h_bp = dst_bp;
909 			dst_bp = NULL;
910 		}
911 	}
912 
913 	leaf_node = info.h_levels[info.h_levels_num - 1].h_entry;
914 	blknum = ext2_htree_get_block(leaf_node);
915 	error = ext2_blkatoff(dvp, blknum * blksize, NULL, &bp);
916 	if (error)
917 		goto finish;
918 
919 	/* Split target directory block */
920 	newdirblock = malloc(blksize, M_TEMP, M_WAITOK | M_ZERO);
921 	ext2_get_hash_seed(fs, hash_seed);
922 	ext2_htree_split_dirblock(ip, (char *)bp->b_data, newdirblock, blksize,
923 	    hash_seed, hash_version, &split_hash, entry);
924 	cursize = roundup(ip->i_size, blksize);
925 	dirsize = cursize + blksize;
926 	blknum = dirsize / blksize - 1;
927 
928 	/* Add index entry for the new directory block */
929 	ext2_htree_insert_entry(&info, split_hash, blknum);
930 
931 	/* Write the new directory block to the end of the directory */
932 	ext2_dirent_csum_set(ip, (struct ext2fs_direct_2 *)newdirblock);
933 	error = ext2_htree_append_block(dvp, newdirblock, cnp, blksize);
934 	if (error)
935 		goto finish;
936 
937 	/* Write the target directory block */
938 	ext2_dirent_csum_set(ip, (struct ext2fs_direct_2 *)bp->b_data);
939 	error = bwrite(bp);
940 	ip->i_flag |= IN_CHANGE | IN_UPDATE;
941 	if (error)
942 		goto finish;
943 	write_bp = 1;
944 
945 	/* Write the index block */
946 	error = ext2_htree_writebuf(ip, &info);
947 	if (!error)
948 		write_info = 1;
949 
950 finish:
951 	if (dst_bp != NULL && !write_dst_bp)
952 		brelse(dst_bp);
953 	if (bp != NULL && !write_bp)
954 		brelse(bp);
955 	if (newdirblock != NULL)
956 		free(newdirblock, M_TEMP);
957 	if (newidxblock != NULL)
958 		free(newidxblock, M_TEMP);
959 	if (!write_info)
960 		ext2_htree_release(&info);
961 	return (error);
962 }
963