xref: /freebsd/stand/libsa/ext2fs.c (revision cfd6422a5217410fbd66f7a7a8a64d9d85e61229)
1 /*-
2  * Copyright (c) 1999,2000 Jonathan Lemon <jlemon@freebsd.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 /*-
31  * Copyright (c) 1993
32  *	The Regents of the University of California.  All rights reserved.
33  *
34  * This code is derived from software contributed to Berkeley by
35  * The Mach Operating System project at Carnegie-Mellon University.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. All advertising materials mentioning features or use of this software
46  *    must display the following acknowledgement:
47  *	This product includes software developed by the University of
48  *	California, Berkeley and its contributors.
49  * 4. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *
66  * Copyright (c) 1990, 1991 Carnegie Mellon University
67  * All Rights Reserved.
68  *
69  * Author: David Golub
70  *
71  * Permission to use, copy, modify and distribute this software and its
72  * documentation is hereby granted, provided that both the copyright
73  * notice and this permission notice appear in all copies of the
74  * software, derivative works or modified versions, and any portions
75  * thereof, and that both notices appear in supporting documentation.
76  *
77  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
78  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
79  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
80  *
81  * Carnegie Mellon requests users of this software to return to
82  *
83  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
84  *  School of Computer Science
85  *  Carnegie Mellon University
86  *  Pittsburgh PA 15213-3890
87  *
88  * any improvements or extensions that they make and grant Carnegie the
89  * rights to redistribute these changes.
90  */
91 
92 #include <sys/param.h>
93 #include <sys/time.h>
94 #include "stand.h"
95 #include "string.h"
96 
97 static int	ext2fs_open(const char *path, struct open_file *f);
98 static int	ext2fs_close(struct open_file *f);
99 static int	ext2fs_read(struct open_file *f, void *buf,
100 			 size_t size, size_t *resid);
101 static off_t	ext2fs_seek(struct open_file *f, off_t offset, int where);
102 static int	ext2fs_stat(struct open_file *f, struct stat *sb);
103 static int	ext2fs_readdir(struct open_file *f, struct dirent *d);
104 
105 static int dtmap[] = { DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR,
106 			 DT_BLK, DT_FIFO, DT_SOCK, DT_LNK };
107 #define EXTFTODT(x)	(x) > sizeof(dtmap) / sizeof(dtmap[0]) ? \
108 			DT_UNKNOWN : dtmap[x]
109 
110 struct fs_ops ext2fs_fsops = {
111 	"ext2fs",
112 	ext2fs_open,
113 	ext2fs_close,
114 	ext2fs_read,
115 	null_write,
116 	ext2fs_seek,
117 	ext2fs_stat,
118 	ext2fs_readdir
119 };
120 
121 #define	EXT2_SBSIZE	1024
122 #define	EXT2_SBLOCK	(1024 / DEV_BSIZE)	/* block offset of superblock */
123 #define EXT2_MAGIC	0xef53
124 #define EXT2_ROOTINO	2
125 
126 #define EXT2_REV0		0	/* original revision of ext2 */
127 #define EXT2_R0_ISIZE		128	/* inode size */
128 #define EXT2_R0_FIRSTINO	11	/* first inode */
129 
130 #define EXT2_MINBSHIFT		10	/* mininum block shift */
131 #define EXT2_MINFSHIFT		10	/* mininum frag shift */
132 
133 #define EXT2_NDADDR		12	/* # of direct blocks */
134 #define EXT2_NIADDR		3	/* # of indirect blocks */
135 
136 /*
137  * file system block to disk address
138  */
139 #define fsb_to_db(fs, blk)	((blk) << (fs)->fs_fsbtodb)
140 
141 /*
142  * inode to block group offset
143  * inode to block group
144  * inode to disk address
145  * inode to block offset
146  */
147 #define ino_to_bgo(fs, ino)	(((ino) - 1) % (fs)->fs_ipg)
148 #define ino_to_bg(fs, ino)	(((ino) - 1) / (fs)->fs_ipg)
149 #define ino_to_db(fs, bg, ino) \
150 	fsb_to_db(fs, ((bg)[ino_to_bg(fs, ino)].bg_inotbl + \
151 	    ino_to_bgo(fs, ino) / (fs)->fs_ipb))
152 #define ino_to_bo(fs, ino)	(ino_to_bgo(fs, ino) % (fs)->fs_ipb)
153 
154 #define nindir(fs) \
155 	((fs)->fs_bsize / sizeof(uint32_t))
156 #define lblkno(fs, loc)				/* loc / bsize */ \
157 	((loc) >> (fs)->fs_bshift)
158 #define smalllblktosize(fs, blk)		/* blk * bsize */ \
159 	((blk) << (fs)->fs_bshift)
160 #define blkoff(fs, loc)				/* loc % bsize */ \
161 	((loc) & (fs)->fs_bmask)
162 #define fragroundup(fs, size)			/* roundup(size, fsize) */ \
163 	(((size) + (fs)->fs_fmask) & ~(fs)->fs_fmask)
164 #define dblksize(fs, dip, lbn) \
165 	(((lbn) >= EXT2_NDADDR || (dip)->di_size >= smalllblktosize(fs, (lbn) + 1)) \
166 	    ? (fs)->fs_bsize \
167 	    : (fragroundup(fs, blkoff(fs, (dip)->di_size))))
168 
169 /*
170  * superblock describing ext2fs
171  */
172 struct ext2fs_disk {
173 	uint32_t	fd_inodes;	/* # of inodes */
174 	uint32_t	fd_blocks;	/* # of blocks */
175 	uint32_t	fd_resblk;	/* # of reserved blocks */
176 	uint32_t	fd_freeblk;	/* # of free blocks */
177 	uint32_t	fd_freeino;	/* # of free inodes */
178 	uint32_t	fd_firstblk;	/* first data block */
179 	uint32_t	fd_bsize;	/* block size */
180 	uint32_t	fd_fsize;	/* frag size */
181 	uint32_t	fd_bpg;		/* blocks per group */
182 	uint32_t	fd_fpg;		/* frags per group */
183 	uint32_t	fd_ipg;		/* inodes per group */
184 	uint32_t	fd_mtime;	/* mount time */
185 	uint32_t	fd_wtime;	/* write time */
186 	uint16_t	fd_mount;	/* # of mounts */
187 	int16_t		fd_maxmount;	/* max # of mounts */
188 	uint16_t	fd_magic;	/* magic number */
189 	uint16_t	fd_state;	/* state */
190 	uint16_t	fd_eflag;	/* error flags */
191 	uint16_t	fd_mnrrev;	/* minor revision */
192 	uint32_t	fd_lastchk;	/* last check */
193 	uint32_t	fd_chkintvl;	/* maximum check interval */
194 	uint32_t	fd_os;		/* os */
195 	uint32_t	fd_revision;	/* revision */
196 	uint16_t	fd_uid;		/* uid for reserved blocks */
197 	uint16_t	fd_gid;		/* gid for reserved blocks */
198 
199 	uint32_t	fd_firstino;	/* first non-reserved inode */
200 	uint16_t	fd_isize;	/* inode size */
201 	uint16_t	fd_nblkgrp;	/* block group # of superblock */
202 	uint32_t	fd_fcompat;	/* compatible features */
203 	uint32_t	fd_fincompat;	/* incompatible features */
204 	uint32_t	fd_frocompat;	/* read-only compatibilties */
205 	uint8_t		fd_uuid[16];	/* volume uuid */
206 	char 		fd_volname[16];	/* volume name */
207 	char 		fd_fsmnt[64];	/* name last mounted on */
208 	uint32_t	fd_bitmap;	/* compression bitmap */
209 
210 	uint8_t		fd_nblkpa;	/* # of blocks to preallocate */
211 	uint8_t		fd_ndblkpa;	/* # of dir blocks to preallocate */
212 };
213 
214 struct ext2fs_core {
215 	int		fc_bsize;	/* block size */
216 	int		fc_bshift;	/* block shift amount */
217 	int		fc_bmask;	/* block mask */
218 	int		fc_fsize;	/* frag size */
219 	int		fc_fshift;	/* frag shift amount */
220 	int		fc_fmask;	/* frag mask */
221 	int		fc_isize;	/* inode size */
222 	int		fc_imask;	/* inode mask */
223 	int		fc_firstino;	/* first non-reserved inode */
224 	int		fc_ipb;		/* inodes per block */
225 	int		fc_fsbtodb;	/* fsb to ds shift */
226 };
227 
228 struct ext2fs {
229 	struct		ext2fs_disk fs_fd;
230 	char		fs_pad[EXT2_SBSIZE - sizeof(struct ext2fs_disk)];
231 	struct		ext2fs_core fs_fc;
232 
233 #define fs_magic	fs_fd.fd_magic
234 #define fs_revision	fs_fd.fd_revision
235 #define fs_blocks	fs_fd.fd_blocks
236 #define fs_firstblk	fs_fd.fd_firstblk
237 #define fs_bpg		fs_fd.fd_bpg
238 #define fs_ipg		fs_fd.fd_ipg
239 
240 #define fs_bsize	fs_fc.fc_bsize
241 #define fs_bshift	fs_fc.fc_bshift
242 #define fs_bmask	fs_fc.fc_bmask
243 #define fs_fsize	fs_fc.fc_fsize
244 #define fs_fshift	fs_fc.fc_fshift
245 #define fs_fmask	fs_fc.fc_fmask
246 #define fs_isize	fs_fc.fc_isize
247 #define fs_imask	fs_fc.fc_imask
248 #define fs_firstino	fs_fc.fc_firstino
249 #define fs_ipb		fs_fc.fc_ipb
250 #define fs_fsbtodb	fs_fc.fc_fsbtodb
251 };
252 
253 struct ext2blkgrp {
254 	uint32_t	bg_blkmap;	/* block bitmap */
255 	uint32_t	bg_inomap;	/* inode bitmap */
256 	uint32_t	bg_inotbl;	/* inode table */
257 	uint16_t	bg_nfblk;	/* # of free blocks */
258 	uint16_t	bg_nfino;	/* # of free inodes */
259 	uint16_t	bg_ndirs;	/* # of dirs */
260 	char		bg_pad[14];
261 };
262 
263 struct ext2dinode {
264 	uint16_t	di_mode;	/* mode */
265 	uint16_t	di_uid;		/* uid */
266 	uint32_t	di_size;	/* byte size */
267 	uint32_t	di_atime;	/* access time */
268 	uint32_t	di_ctime;	/* creation time */
269 	uint32_t	di_mtime;	/* modification time */
270 	uint32_t	di_dtime;	/* deletion time */
271 	uint16_t	di_gid;		/* gid */
272 	uint16_t	di_nlink;	/* link count */
273 	uint32_t	di_nblk;	/* block count */
274 	uint32_t	di_flags;	/* file flags */
275 
276 	uint32_t	di_osdep1;	/* os dependent stuff */
277 
278 	uint32_t	di_db[EXT2_NDADDR]; /* direct blocks */
279 	uint32_t	di_ib[EXT2_NIADDR]; /* indirect blocks */
280 	uint32_t	di_version;	/* version */
281 	uint32_t	di_facl;	/* file acl */
282 	uint32_t	di_dacl;	/* dir acl */
283 	uint32_t	di_faddr;	/* fragment addr */
284 
285 	uint8_t		di_frag;	/* fragment number */
286 	uint8_t		di_fsize;	/* fragment size */
287 
288 	char		di_pad[10];
289 
290 #define di_shortlink	di_db
291 };
292 
293 #define EXT2_MAXNAMLEN       255
294 
295 struct ext2dirent {
296 	uint32_t	d_ino;		/* inode */
297 	uint16_t	d_reclen;	/* directory entry length */
298 	uint8_t		d_namlen;	/* name length */
299 	uint8_t		d_type;		/* file type */
300 	char		d_name[EXT2_MAXNAMLEN];
301 };
302 
303 struct file {
304 	off_t		f_seekp;		/* seek pointer */
305 	struct 		ext2fs *f_fs;		/* pointer to super-block */
306 	struct 		ext2blkgrp *f_bg;	/* pointer to blkgrp map */
307 	struct 		ext2dinode f_di;	/* copy of on-disk inode */
308 	int		f_nindir[EXT2_NIADDR];	/* number of blocks mapped by
309 						   indirect block at level i */
310 	char		*f_blk[EXT2_NIADDR];	/* buffer for indirect block
311 						   at level i */
312 	size_t		f_blksize[EXT2_NIADDR];	/* size of buffer */
313 	daddr_t		f_blkno[EXT2_NIADDR];	/* disk address of block in
314 						   buffer */
315 	char		*f_buf;			/* buffer for data block */
316 	size_t		f_buf_size;		/* size of data block */
317 	daddr_t		f_buf_blkno;		/* block number of data block */
318 };
319 
320 /* forward decls */
321 static int 	read_inode(ino_t inumber, struct open_file *f);
322 static int	block_map(struct open_file *f, daddr_t file_block,
323 		    daddr_t *disk_block_p);
324 static int	buf_read_file(struct open_file *f, char **buf_p,
325 		    size_t *size_p);
326 static int	search_directory(char *name, struct open_file *f,
327 		    ino_t *inumber_p);
328 
329 /*
330  * Open a file.
331  */
332 static int
333 ext2fs_open(const char *upath, struct open_file *f)
334 {
335 	struct file *fp;
336 	struct ext2fs *fs;
337 	size_t buf_size;
338 	ino_t inumber, parent_inumber;
339 	int i, len, groups, bg_per_blk, blkgrps, mult;
340 	int nlinks = 0;
341 	int error = 0;
342 	char *cp, *ncp, *path = NULL, *buf = NULL;
343 	char namebuf[MAXPATHLEN+1];
344 	char c;
345 
346 	/* allocate file system specific data structure */
347 	fp = malloc(sizeof(struct file));
348 	if (fp == NULL)
349 		return (ENOMEM);
350 	bzero(fp, sizeof(struct file));
351 	f->f_fsdata = (void *)fp;
352 
353 	/* allocate space and read super block */
354 	fs = (struct ext2fs *)malloc(sizeof(*fs));
355 	fp->f_fs = fs;
356 	twiddle(1);
357 	error = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
358 	    EXT2_SBLOCK, EXT2_SBSIZE, (char *)fs, &buf_size);
359 	if (error)
360 		goto out;
361 
362 	if (buf_size != EXT2_SBSIZE || fs->fs_magic != EXT2_MAGIC) {
363 		error = EINVAL;
364 		goto out;
365 	}
366 
367 	/*
368 	 * compute in-core values for the superblock
369 	 */
370 	fs->fs_bshift = EXT2_MINBSHIFT + fs->fs_fd.fd_bsize;
371 	fs->fs_bsize = 1 << fs->fs_bshift;
372 	fs->fs_bmask = fs->fs_bsize - 1;
373 
374 	fs->fs_fshift = EXT2_MINFSHIFT + fs->fs_fd.fd_fsize;
375 	fs->fs_fsize = 1 << fs->fs_fshift;
376 	fs->fs_fmask = fs->fs_fsize - 1;
377 
378 	if (fs->fs_revision == EXT2_REV0) {
379 		fs->fs_isize = EXT2_R0_ISIZE;
380 		fs->fs_firstino = EXT2_R0_FIRSTINO;
381 	} else {
382 		fs->fs_isize = fs->fs_fd.fd_isize;
383 		fs->fs_firstino = fs->fs_fd.fd_firstino;
384 	}
385 	fs->fs_imask = fs->fs_isize - 1;
386 	fs->fs_ipb = fs->fs_bsize / fs->fs_isize;
387 	fs->fs_fsbtodb = (fs->fs_bsize / DEV_BSIZE) - 1;
388 
389 	/*
390 	 * we have to load in the "group descriptors" here
391 	 */
392 	groups = howmany(fs->fs_blocks - fs->fs_firstblk, fs->fs_bpg);
393 	bg_per_blk = fs->fs_bsize / sizeof(struct ext2blkgrp);
394 	blkgrps = howmany(groups, bg_per_blk);
395 	len = blkgrps * fs->fs_bsize;
396 
397 	fp->f_bg = malloc(len);
398 	twiddle(1);
399 	error = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
400 	    EXT2_SBLOCK + EXT2_SBSIZE / DEV_BSIZE, len,
401 	    (char *)fp->f_bg, &buf_size);
402 	if (error)
403 		goto out;
404 
405 	/*
406 	 * XXX
407 	 * validation of values?  (blocksize, descriptors, etc?)
408 	 */
409 
410 	/*
411 	 * Calculate indirect block levels.
412 	 */
413 	mult = 1;
414 	for (i = 0; i < EXT2_NIADDR; i++) {
415 		mult *= nindir(fs);
416 		fp->f_nindir[i] = mult;
417 	}
418 
419 	inumber = EXT2_ROOTINO;
420 	if ((error = read_inode(inumber, f)) != 0)
421 		goto out;
422 
423 	path = strdup(upath);
424 	if (path == NULL) {
425 		error = ENOMEM;
426 		goto out;
427 	}
428 	cp = path;
429 	while (*cp) {
430 		/*
431 		 * Remove extra separators
432 		 */
433 		while (*cp == '/')
434 			cp++;
435 		if (*cp == '\0')
436 			break;
437 
438 		/*
439 		 * Check that current node is a directory.
440 		 */
441 		if (! S_ISDIR(fp->f_di.di_mode)) {
442 			error = ENOTDIR;
443 			goto out;
444 		}
445 
446 		/*
447 		 * Get next component of path name.
448 		 */
449 		len = 0;
450 
451 		ncp = cp;
452 		while ((c = *cp) != '\0' && c != '/') {
453 			if (++len > EXT2_MAXNAMLEN) {
454 				error = ENOENT;
455 				goto out;
456 			}
457 			cp++;
458 		}
459 		*cp = '\0';
460 
461 		/*
462 		 * Look up component in current directory.
463 		 * Save directory inumber in case we find a
464 		 * symbolic link.
465 		 */
466 		parent_inumber = inumber;
467 		error = search_directory(ncp, f, &inumber);
468 		*cp = c;
469 		if (error)
470 			goto out;
471 
472 		/*
473 		 * Open next component.
474 		 */
475 		if ((error = read_inode(inumber, f)) != 0)
476 			goto out;
477 
478 		/*
479 		 * Check for symbolic link.
480 		 */
481 		if (S_ISLNK(fp->f_di.di_mode)) {
482 			int link_len = fp->f_di.di_size;
483 			int len;
484 
485 			len = strlen(cp);
486 			if (link_len + len > MAXPATHLEN ||
487 			    ++nlinks > MAXSYMLINKS) {
488 				error = ENOENT;
489 				goto out;
490 			}
491 
492 			bcopy(cp, &namebuf[link_len], len + 1);
493 			if (fp->f_di.di_nblk == 0) {
494 				bcopy(fp->f_di.di_shortlink,
495 				    namebuf, link_len);
496 			} else {
497 				/*
498 				 * Read file for symbolic link
499 				 */
500 				struct ext2fs *fs = fp->f_fs;
501 				daddr_t	disk_block;
502 				size_t buf_size;
503 
504 				if (! buf)
505 					buf = malloc(fs->fs_bsize);
506 				error = block_map(f, (daddr_t)0, &disk_block);
507 				if (error)
508 					goto out;
509 
510 				twiddle(1);
511 				error = (f->f_dev->dv_strategy)(f->f_devdata,
512 				    F_READ, fsb_to_db(fs, disk_block),
513 				    fs->fs_bsize, buf, &buf_size);
514 				if (error)
515 					goto out;
516 
517 				bcopy((char *)buf, namebuf, link_len);
518 			}
519 
520 			/*
521 			 * If relative pathname, restart at parent directory.
522 			 * If absolute pathname, restart at root.
523 			 */
524 			cp = namebuf;
525 			if (*cp != '/')
526 				inumber = parent_inumber;
527 			else
528 				inumber = (ino_t)EXT2_ROOTINO;
529 
530 			if ((error = read_inode(inumber, f)) != 0)
531 				goto out;
532 		}
533 	}
534 
535 	/*
536 	 * Found terminal component.
537 	 */
538 	error = 0;
539 	fp->f_seekp = 0;
540 out:
541 	if (buf)
542 		free(buf);
543 	if (path)
544 		free(path);
545 	if (error) {
546 		if (fp->f_buf)
547 			free(fp->f_buf);
548 		free(fp->f_fs);
549 		free(fp);
550 	}
551 	return (error);
552 }
553 
554 /*
555  * Read a new inode into a file structure.
556  */
557 static int
558 read_inode(ino_t inumber, struct open_file *f)
559 {
560 	struct file *fp = (struct file *)f->f_fsdata;
561 	struct ext2fs *fs = fp->f_fs;
562 	struct ext2dinode *dp;
563 	char *buf;
564 	size_t rsize;
565 	int level, error = 0;
566 
567 	/*
568 	 * Read inode and save it.
569 	 */
570 	buf = malloc(fs->fs_bsize);
571 	twiddle(1);
572 	error = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
573 	    ino_to_db(fs, fp->f_bg, inumber), fs->fs_bsize, buf, &rsize);
574 	if (error)
575 		goto out;
576 	if (rsize != fs->fs_bsize) {
577 		error = EIO;
578 		goto out;
579 	}
580 
581 	dp = (struct ext2dinode *)buf;
582 	fp->f_di = dp[ino_to_bo(fs, inumber)];
583 
584 	/* clear out old buffers */
585 	for (level = 0; level < EXT2_NIADDR; level++)
586 		fp->f_blkno[level] = -1;
587 	fp->f_buf_blkno = -1;
588 	fp->f_seekp = 0;
589 
590 out:
591 	free(buf);
592 	return (error);
593 }
594 
595 /*
596  * Given an offset in a file, find the disk block number that
597  * contains that block.
598  */
599 static int
600 block_map(struct open_file *f, daddr_t file_block, daddr_t *disk_block_p)
601 {
602 	struct file *fp = (struct file *)f->f_fsdata;
603 	struct ext2fs *fs = fp->f_fs;
604 	daddr_t ind_block_num;
605 	int32_t *ind_p;
606 	int idx, level;
607 	int error;
608 
609 	/*
610 	 * Index structure of an inode:
611 	 *
612 	 * di_db[0..EXT2_NDADDR-1] hold block numbers for blocks
613 	 *			0..EXT2_NDADDR-1
614 	 *
615 	 * di_ib[0]		index block 0 is the single indirect block
616 	 *			holds block numbers for blocks
617 	 *			EXT2_NDADDR .. EXT2_NDADDR + NINDIR(fs)-1
618 	 *
619 	 * di_ib[1]		index block 1 is the double indirect block
620 	 *			holds block numbers for INDEX blocks for blocks
621 	 *			EXT2_NDADDR + NINDIR(fs) ..
622 	 *			EXT2_NDADDR + NINDIR(fs) + NINDIR(fs)**2 - 1
623 	 *
624 	 * di_ib[2]		index block 2 is the triple indirect block
625 	 *			holds block numbers for double-indirect
626 	 *			blocks for blocks
627 	 *			EXT2_NDADDR + NINDIR(fs) + NINDIR(fs)**2 ..
628 	 *			EXT2_NDADDR + NINDIR(fs) + NINDIR(fs)**2
629 	 *				+ NINDIR(fs)**3 - 1
630 	 */
631 
632 	if (file_block < EXT2_NDADDR) {
633 		/* Direct block. */
634 		*disk_block_p = fp->f_di.di_db[file_block];
635 		return (0);
636 	}
637 
638 	file_block -= EXT2_NDADDR;
639 
640 	/*
641 	 * nindir[0] = NINDIR
642 	 * nindir[1] = NINDIR**2
643 	 * nindir[2] = NINDIR**3
644 	 *	etc
645 	 */
646 	for (level = 0; level < EXT2_NIADDR; level++) {
647 		if (file_block < fp->f_nindir[level])
648 			break;
649 		file_block -= fp->f_nindir[level];
650 	}
651 	if (level == EXT2_NIADDR) {
652 		/* Block number too high */
653 		return (EFBIG);
654 	}
655 
656 	ind_block_num = fp->f_di.di_ib[level];
657 
658 	for (; level >= 0; level--) {
659 		if (ind_block_num == 0) {
660 			*disk_block_p = 0;	/* missing */
661 			return (0);
662 		}
663 
664 		if (fp->f_blkno[level] != ind_block_num) {
665 			if (fp->f_blk[level] == (char *)0)
666 				fp->f_blk[level] =
667 					malloc(fs->fs_bsize);
668 			twiddle(1);
669 			error = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
670 			    fsb_to_db(fp->f_fs, ind_block_num), fs->fs_bsize,
671 			    fp->f_blk[level], &fp->f_blksize[level]);
672 			if (error)
673 				return (error);
674 			if (fp->f_blksize[level] != fs->fs_bsize)
675 				return (EIO);
676 			fp->f_blkno[level] = ind_block_num;
677 		}
678 
679 		ind_p = (int32_t *)fp->f_blk[level];
680 
681 		if (level > 0) {
682 			idx = file_block / fp->f_nindir[level - 1];
683 			file_block %= fp->f_nindir[level - 1];
684 		} else {
685 			idx = file_block;
686 		}
687 		ind_block_num = ind_p[idx];
688 	}
689 
690 	*disk_block_p = ind_block_num;
691 
692 	return (0);
693 }
694 
695 /*
696  * Read a portion of a file into an internal buffer.  Return
697  * the location in the buffer and the amount in the buffer.
698  */
699 static int
700 buf_read_file(struct open_file *f, char **buf_p, size_t *size_p)
701 {
702 	struct file *fp = (struct file *)f->f_fsdata;
703 	struct ext2fs *fs = fp->f_fs;
704 	long off;
705 	daddr_t file_block;
706 	daddr_t	disk_block;
707 	size_t block_size;
708 	int error = 0;
709 
710 	off = blkoff(fs, fp->f_seekp);
711 	file_block = lblkno(fs, fp->f_seekp);
712 	block_size = dblksize(fs, &fp->f_di, file_block);
713 
714 	if (file_block != fp->f_buf_blkno) {
715 		error = block_map(f, file_block, &disk_block);
716 		if (error)
717 			goto done;
718 
719 		if (fp->f_buf == (char *)0)
720 			fp->f_buf = malloc(fs->fs_bsize);
721 
722 		if (disk_block == 0) {
723 			bzero(fp->f_buf, block_size);
724 			fp->f_buf_size = block_size;
725 		} else {
726 			twiddle(4);
727 			error = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
728 			    fsb_to_db(fs, disk_block), block_size,
729 			    fp->f_buf, &fp->f_buf_size);
730 			if (error)
731 				goto done;
732 		}
733 		fp->f_buf_blkno = file_block;
734 	}
735 
736 	/*
737 	 * Return address of byte in buffer corresponding to
738 	 * offset, and size of remainder of buffer after that
739 	 * byte.
740 	 */
741 	*buf_p = fp->f_buf + off;
742 	*size_p = block_size - off;
743 
744 	/*
745 	 * But truncate buffer at end of file.
746 	 */
747 	if (*size_p > fp->f_di.di_size - fp->f_seekp)
748 		*size_p = fp->f_di.di_size - fp->f_seekp;
749 done:
750 	return (error);
751 }
752 
753 /*
754  * Search a directory for a name and return its
755  * i_number.
756  */
757 static int
758 search_directory(char *name, struct open_file *f, ino_t *inumber_p)
759 {
760 	struct file *fp = (struct file *)f->f_fsdata;
761 	struct ext2dirent *dp, *edp;
762 	char *buf;
763 	size_t buf_size;
764 	int namlen, length;
765 	int error;
766 
767 	length = strlen(name);
768 	fp->f_seekp = 0;
769 	while (fp->f_seekp < fp->f_di.di_size) {
770 		error = buf_read_file(f, &buf, &buf_size);
771 		if (error)
772 			return (error);
773 		dp = (struct ext2dirent *)buf;
774 		edp = (struct ext2dirent *)(buf + buf_size);
775 		while (dp < edp) {
776 			if (dp->d_ino == (ino_t)0)
777 				goto next;
778 			namlen = dp->d_namlen;
779 			if (namlen == length &&
780 			    strncmp(name, dp->d_name, length) == 0) {
781 				/* found entry */
782 				*inumber_p = dp->d_ino;
783 				return (0);
784 			}
785 		next:
786 			dp = (struct ext2dirent *)((char *)dp + dp->d_reclen);
787 		}
788 		fp->f_seekp += buf_size;
789 	}
790 	return (ENOENT);
791 }
792 
793 static int
794 ext2fs_close(struct open_file *f)
795 {
796 	struct file *fp = (struct file *)f->f_fsdata;
797 	int level;
798 
799 	f->f_fsdata = (void *)0;
800 	if (fp == (struct file *)0)
801 		return (0);
802 
803 	for (level = 0; level < EXT2_NIADDR; level++) {
804 		if (fp->f_blk[level])
805 			free(fp->f_blk[level]);
806 	}
807 	if (fp->f_buf)
808 		free(fp->f_buf);
809 	if (fp->f_bg)
810 		free(fp->f_bg);
811 	free(fp->f_fs);
812 	free(fp);
813 	return (0);
814 }
815 
816 static int
817 ext2fs_read(struct open_file *f, void *addr, size_t size, size_t *resid)
818 {
819 	struct file *fp = (struct file *)f->f_fsdata;
820 	size_t csize, buf_size;
821 	char *buf;
822 	int error = 0;
823 
824 	while (size != 0) {
825 		if (fp->f_seekp >= fp->f_di.di_size)
826 			break;
827 
828 		error = buf_read_file(f, &buf, &buf_size);
829 		if (error)
830 			break;
831 
832 		csize = size;
833 		if (csize > buf_size)
834 			csize = buf_size;
835 
836 		bcopy(buf, addr, csize);
837 
838 		fp->f_seekp += csize;
839 		addr = (char *)addr + csize;
840 		size -= csize;
841 	}
842 	if (resid)
843 		*resid = size;
844 	return (error);
845 }
846 
847 static off_t
848 ext2fs_seek(struct open_file *f, off_t offset, int where)
849 {
850 	struct file *fp = (struct file *)f->f_fsdata;
851 
852 	switch (where) {
853 	case SEEK_SET:
854 		fp->f_seekp = offset;
855 		break;
856 	case SEEK_CUR:
857 		fp->f_seekp += offset;
858 		break;
859 	case SEEK_END:
860 		fp->f_seekp = fp->f_di.di_size - offset;
861 		break;
862 	default:
863 		errno = EINVAL;
864 		return (-1);
865 	}
866 	return (fp->f_seekp);
867 }
868 
869 static int
870 ext2fs_stat(struct open_file *f, struct stat *sb)
871 {
872 	struct file *fp = (struct file *)f->f_fsdata;
873 
874 	/* only important stuff */
875 	sb->st_mode = fp->f_di.di_mode;
876 	sb->st_uid = fp->f_di.di_uid;
877 	sb->st_gid = fp->f_di.di_gid;
878 	sb->st_size = fp->f_di.di_size;
879 	return (0);
880 }
881 
882 static int
883 ext2fs_readdir(struct open_file *f, struct dirent *d)
884 {
885 	struct file *fp = (struct file *)f->f_fsdata;
886 	struct ext2dirent *ed;
887 	char *buf;
888 	size_t buf_size;
889 	int error;
890 
891 	/*
892 	 * assume that a directory entry will not be split across blocks
893 	 */
894 again:
895 	if (fp->f_seekp >= fp->f_di.di_size)
896 		return (ENOENT);
897 	error = buf_read_file(f, &buf, &buf_size);
898 	if (error)
899 		return (error);
900 	ed = (struct ext2dirent *)buf;
901 	fp->f_seekp += ed->d_reclen;
902 	if (ed->d_ino == (ino_t)0)
903 		goto again;
904 	d->d_type = EXTFTODT(ed->d_type);
905 	strncpy(d->d_name, ed->d_name, ed->d_namlen);
906 	d->d_name[ed->d_namlen] = '\0';
907 	return (0);
908 }
909