xref: /linux/fs/jffs2/readinode.c (revision c537b994505099b7197e7d3125b942ecbcc51eb6)
1 /*
2  * JFFS2 -- Journalling Flash File System, Version 2.
3  *
4  * Copyright (C) 2001-2003 Red Hat, Inc.
5  *
6  * Created by David Woodhouse <dwmw2@infradead.org>
7  *
8  * For licensing information, see the file 'LICENCE' in this directory.
9  *
10  * $Id: readinode.c,v 1.143 2005/11/07 11:14:41 gleixner Exp $
11  *
12  */
13 
14 #include <linux/kernel.h>
15 #include <linux/sched.h>
16 #include <linux/slab.h>
17 #include <linux/fs.h>
18 #include <linux/crc32.h>
19 #include <linux/pagemap.h>
20 #include <linux/mtd/mtd.h>
21 #include <linux/compiler.h>
22 #include "nodelist.h"
23 
24 /*
25  * Put a new tmp_dnode_info into the temporaty RB-tree, keeping the list in
26  * order of increasing version.
27  */
28 static void jffs2_add_tn_to_tree(struct jffs2_tmp_dnode_info *tn, struct rb_root *list)
29 {
30 	struct rb_node **p = &list->rb_node;
31 	struct rb_node * parent = NULL;
32 	struct jffs2_tmp_dnode_info *this;
33 
34 	while (*p) {
35 		parent = *p;
36 		this = rb_entry(parent, struct jffs2_tmp_dnode_info, rb);
37 
38 		/* There may actually be a collision here, but it doesn't
39 		   actually matter. As long as the two nodes with the same
40 		   version are together, it's all fine. */
41 		if (tn->version > this->version)
42 			p = &(*p)->rb_left;
43 		else
44 			p = &(*p)->rb_right;
45 	}
46 
47 	rb_link_node(&tn->rb, parent, p);
48 	rb_insert_color(&tn->rb, list);
49 }
50 
51 static void jffs2_free_tmp_dnode_info_list(struct rb_root *list)
52 {
53 	struct rb_node *this;
54 	struct jffs2_tmp_dnode_info *tn;
55 
56 	this = list->rb_node;
57 
58 	/* Now at bottom of tree */
59 	while (this) {
60 		if (this->rb_left)
61 			this = this->rb_left;
62 		else if (this->rb_right)
63 			this = this->rb_right;
64 		else {
65 			tn = rb_entry(this, struct jffs2_tmp_dnode_info, rb);
66 			jffs2_free_full_dnode(tn->fn);
67 			jffs2_free_tmp_dnode_info(tn);
68 
69 			this = rb_parent(this);
70 			if (!this)
71 				break;
72 
73 			if (this->rb_left == &tn->rb)
74 				this->rb_left = NULL;
75 			else if (this->rb_right == &tn->rb)
76 				this->rb_right = NULL;
77 			else BUG();
78 		}
79 	}
80 	list->rb_node = NULL;
81 }
82 
83 static void jffs2_free_full_dirent_list(struct jffs2_full_dirent *fd)
84 {
85 	struct jffs2_full_dirent *next;
86 
87 	while (fd) {
88 		next = fd->next;
89 		jffs2_free_full_dirent(fd);
90 		fd = next;
91 	}
92 }
93 
94 /* Returns first valid node after 'ref'. May return 'ref' */
95 static struct jffs2_raw_node_ref *jffs2_first_valid_node(struct jffs2_raw_node_ref *ref)
96 {
97 	while (ref && ref->next_in_ino) {
98 		if (!ref_obsolete(ref))
99 			return ref;
100 		dbg_noderef("node at 0x%08x is obsoleted. Ignoring.\n", ref_offset(ref));
101 		ref = ref->next_in_ino;
102 	}
103 	return NULL;
104 }
105 
106 /*
107  * Helper function for jffs2_get_inode_nodes().
108  * It is called every time an directory entry node is found.
109  *
110  * Returns: 0 on succes;
111  * 	    1 if the node should be marked obsolete;
112  * 	    negative error code on failure.
113  */
114 static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
115 				struct jffs2_raw_dirent *rd, size_t read, struct jffs2_full_dirent **fdp,
116 				uint32_t *latest_mctime, uint32_t *mctime_ver)
117 {
118 	struct jffs2_full_dirent *fd;
119 	uint32_t crc;
120 
121 	/* Obsoleted. This cannot happen, surely? dwmw2 20020308 */
122 	BUG_ON(ref_obsolete(ref));
123 
124 	crc = crc32(0, rd, sizeof(*rd) - 8);
125 	if (unlikely(crc != je32_to_cpu(rd->node_crc))) {
126 		JFFS2_NOTICE("header CRC failed on dirent node at %#08x: read %#08x, calculated %#08x\n",
127 			     ref_offset(ref), je32_to_cpu(rd->node_crc), crc);
128 		return 1;
129 	}
130 
131 	/* If we've never checked the CRCs on this node, check them now */
132 	if (ref_flags(ref) == REF_UNCHECKED) {
133 		struct jffs2_eraseblock *jeb;
134 		int len;
135 
136 		/* Sanity check */
137 		if (unlikely(PAD((rd->nsize + sizeof(*rd))) != PAD(je32_to_cpu(rd->totlen)))) {
138 			JFFS2_ERROR("illegal nsize in node at %#08x: nsize %#02x, totlen %#04x\n",
139 				    ref_offset(ref), rd->nsize, je32_to_cpu(rd->totlen));
140 			return 1;
141 		}
142 
143 		jeb = &c->blocks[ref->flash_offset / c->sector_size];
144 		len = ref_totlen(c, jeb, ref);
145 
146 		spin_lock(&c->erase_completion_lock);
147 		jeb->used_size += len;
148 		jeb->unchecked_size -= len;
149 		c->used_size += len;
150 		c->unchecked_size -= len;
151 		ref->flash_offset = ref_offset(ref) | REF_PRISTINE;
152 		spin_unlock(&c->erase_completion_lock);
153 	}
154 
155 	fd = jffs2_alloc_full_dirent(rd->nsize + 1);
156 	if (unlikely(!fd))
157 		return -ENOMEM;
158 
159 	fd->raw = ref;
160 	fd->version = je32_to_cpu(rd->version);
161 	fd->ino = je32_to_cpu(rd->ino);
162 	fd->type = rd->type;
163 
164 	/* Pick out the mctime of the latest dirent */
165 	if(fd->version > *mctime_ver && je32_to_cpu(rd->mctime)) {
166 		*mctime_ver = fd->version;
167 		*latest_mctime = je32_to_cpu(rd->mctime);
168 	}
169 
170 	/*
171 	 * Copy as much of the name as possible from the raw
172 	 * dirent we've already read from the flash.
173 	 */
174 	if (read > sizeof(*rd))
175 		memcpy(&fd->name[0], &rd->name[0],
176 		       min_t(uint32_t, rd->nsize, (read - sizeof(*rd)) ));
177 
178 	/* Do we need to copy any more of the name directly from the flash? */
179 	if (rd->nsize + sizeof(*rd) > read) {
180 		/* FIXME: point() */
181 		int err;
182 		int already = read - sizeof(*rd);
183 
184 		err = jffs2_flash_read(c, (ref_offset(ref)) + read,
185 				rd->nsize - already, &read, &fd->name[already]);
186 		if (unlikely(read != rd->nsize - already) && likely(!err))
187 			return -EIO;
188 
189 		if (unlikely(err)) {
190 			JFFS2_ERROR("read remainder of name: error %d\n", err);
191 			jffs2_free_full_dirent(fd);
192 			return -EIO;
193 		}
194 	}
195 
196 	fd->nhash = full_name_hash(fd->name, rd->nsize);
197 	fd->next = NULL;
198 	fd->name[rd->nsize] = '\0';
199 
200 	/*
201 	 * Wheee. We now have a complete jffs2_full_dirent structure, with
202 	 * the name in it and everything. Link it into the list
203 	 */
204 	jffs2_add_fd_to_list(c, fd, fdp);
205 
206 	return 0;
207 }
208 
209 /*
210  * Helper function for jffs2_get_inode_nodes().
211  * It is called every time an inode node is found.
212  *
213  * Returns: 0 on succes;
214  * 	    1 if the node should be marked obsolete;
215  * 	    negative error code on failure.
216  */
217 static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
218 			     struct jffs2_raw_inode *rd, struct rb_root *tnp, int rdlen,
219 			     uint32_t *latest_mctime, uint32_t *mctime_ver)
220 {
221 	struct jffs2_tmp_dnode_info *tn;
222 	uint32_t len, csize;
223 	int ret = 1;
224 	uint32_t crc;
225 
226 	/* Obsoleted. This cannot happen, surely? dwmw2 20020308 */
227 	BUG_ON(ref_obsolete(ref));
228 
229 	crc = crc32(0, rd, sizeof(*rd) - 8);
230 	if (unlikely(crc != je32_to_cpu(rd->node_crc))) {
231 		JFFS2_NOTICE("node CRC failed on dnode at %#08x: read %#08x, calculated %#08x\n",
232 			     ref_offset(ref), je32_to_cpu(rd->node_crc), crc);
233 		return 1;
234 	}
235 
236 	tn = jffs2_alloc_tmp_dnode_info();
237 	if (!tn) {
238 		JFFS2_ERROR("failed to allocate tn (%zu bytes).\n", sizeof(*tn));
239 		return -ENOMEM;
240 	}
241 
242 	tn->partial_crc = 0;
243 	csize = je32_to_cpu(rd->csize);
244 
245 	/* If we've never checked the CRCs on this node, check them now */
246 	if (ref_flags(ref) == REF_UNCHECKED) {
247 
248 		/* Sanity checks */
249 		if (unlikely(je32_to_cpu(rd->offset) > je32_to_cpu(rd->isize)) ||
250 		    unlikely(PAD(je32_to_cpu(rd->csize) + sizeof(*rd)) != PAD(je32_to_cpu(rd->totlen)))) {
251 				JFFS2_WARNING("inode node header CRC is corrupted at %#08x\n", ref_offset(ref));
252 				jffs2_dbg_dump_node(c, ref_offset(ref));
253 			goto free_out;
254 		}
255 
256 		if (jffs2_is_writebuffered(c) && csize != 0) {
257 			/* At this point we are supposed to check the data CRC
258 			 * of our unchecked node. But thus far, we do not
259 			 * know whether the node is valid or obsolete. To
260 			 * figure this out, we need to walk all the nodes of
261 			 * the inode and build the inode fragtree. We don't
262 			 * want to spend time checking data of nodes which may
263 			 * later be found to be obsolete. So we put off the full
264 			 * data CRC checking until we have read all the inode
265 			 * nodes and have started building the fragtree.
266 			 *
267 			 * The fragtree is being built starting with nodes
268 			 * having the highest version number, so we'll be able
269 			 * to detect whether a node is valid (i.e., it is not
270 			 * overlapped by a node with higher version) or not.
271 			 * And we'll be able to check only those nodes, which
272 			 * are not obsolete.
273 			 *
274 			 * Of course, this optimization only makes sense in case
275 			 * of NAND flashes (or other flashes whith
276 			 * !jffs2_can_mark_obsolete()), since on NOR flashes
277 			 * nodes are marked obsolete physically.
278 			 *
279 			 * Since NAND flashes (or other flashes with
280 			 * jffs2_is_writebuffered(c)) are anyway read by
281 			 * fractions of c->wbuf_pagesize, and we have just read
282 			 * the node header, it is likely that the starting part
283 			 * of the node data is also read when we read the
284 			 * header. So we don't mind to check the CRC of the
285 			 * starting part of the data of the node now, and check
286 			 * the second part later (in jffs2_check_node_data()).
287 			 * Of course, we will not need to re-read and re-check
288 			 * the NAND page which we have just read. This is why we
289 			 * read the whole NAND page at jffs2_get_inode_nodes(),
290 			 * while we needed only the node header.
291 			 */
292 			unsigned char *buf;
293 
294 			/* 'buf' will point to the start of data */
295 			buf = (unsigned char *)rd + sizeof(*rd);
296 			/* len will be the read data length */
297 			len = min_t(uint32_t, rdlen - sizeof(*rd), csize);
298 			tn->partial_crc = crc32(0, buf, len);
299 
300 			dbg_readinode("Calculates CRC (%#08x) for %d bytes, csize %d\n", tn->partial_crc, len, csize);
301 
302 			/* If we actually calculated the whole data CRC
303 			 * and it is wrong, drop the node. */
304 			if (len >= csize && unlikely(tn->partial_crc != je32_to_cpu(rd->data_crc))) {
305 				JFFS2_NOTICE("wrong data CRC in data node at 0x%08x: read %#08x, calculated %#08x.\n",
306 					ref_offset(ref), tn->partial_crc, je32_to_cpu(rd->data_crc));
307 				goto free_out;
308 			}
309 
310 		} else if (csize == 0) {
311 			/*
312 			 * We checked the header CRC. If the node has no data, adjust
313 			 * the space accounting now. For other nodes this will be done
314 			 * later either when the node is marked obsolete or when its
315 			 * data is checked.
316 			 */
317 			struct jffs2_eraseblock *jeb;
318 
319 			dbg_readinode("the node has no data.\n");
320 			jeb = &c->blocks[ref->flash_offset / c->sector_size];
321 			len = ref_totlen(c, jeb, ref);
322 
323 			spin_lock(&c->erase_completion_lock);
324 			jeb->used_size += len;
325 			jeb->unchecked_size -= len;
326 			c->used_size += len;
327 			c->unchecked_size -= len;
328 			ref->flash_offset = ref_offset(ref) | REF_NORMAL;
329 			spin_unlock(&c->erase_completion_lock);
330 		}
331 	}
332 
333 	tn->fn = jffs2_alloc_full_dnode();
334 	if (!tn->fn) {
335 		JFFS2_ERROR("alloc fn failed\n");
336 		ret = -ENOMEM;
337 		goto free_out;
338 	}
339 
340 	tn->version = je32_to_cpu(rd->version);
341 	tn->fn->ofs = je32_to_cpu(rd->offset);
342 	tn->data_crc = je32_to_cpu(rd->data_crc);
343 	tn->csize = csize;
344 	tn->fn->raw = ref;
345 
346 	/* There was a bug where we wrote hole nodes out with
347 	   csize/dsize swapped. Deal with it */
348 	if (rd->compr == JFFS2_COMPR_ZERO && !je32_to_cpu(rd->dsize) && csize)
349 		tn->fn->size = csize;
350 	else // normal case...
351 		tn->fn->size = je32_to_cpu(rd->dsize);
352 
353 	dbg_readinode("dnode @%08x: ver %u, offset %#04x, dsize %#04x, csize %#04x\n",
354 		  ref_offset(ref), je32_to_cpu(rd->version), je32_to_cpu(rd->offset), je32_to_cpu(rd->dsize), csize);
355 
356 	jffs2_add_tn_to_tree(tn, tnp);
357 
358 	return 0;
359 
360 free_out:
361 	jffs2_free_tmp_dnode_info(tn);
362 	return ret;
363 }
364 
365 /*
366  * Helper function for jffs2_get_inode_nodes().
367  * It is called every time an unknown node is found.
368  *
369  * Returns: 0 on success;
370  * 	    1 if the node should be marked obsolete;
371  * 	    negative error code on failure.
372  */
373 static inline int read_unknown(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref, struct jffs2_unknown_node *un)
374 {
375 	/* We don't mark unknown nodes as REF_UNCHECKED */
376 	if (ref_flags(ref) == REF_UNCHECKED) {
377 		JFFS2_ERROR("REF_UNCHECKED but unknown node at %#08x\n",
378 			    ref_offset(ref));
379 		JFFS2_ERROR("Node is {%04x,%04x,%08x,%08x}. Please report this error.\n",
380                             je16_to_cpu(un->magic), je16_to_cpu(un->nodetype),
381                             je32_to_cpu(un->totlen), je32_to_cpu(un->hdr_crc));
382 		return 1;
383 	}
384 
385 	un->nodetype = cpu_to_je16(JFFS2_NODE_ACCURATE | je16_to_cpu(un->nodetype));
386 
387 	switch(je16_to_cpu(un->nodetype) & JFFS2_COMPAT_MASK) {
388 
389 	case JFFS2_FEATURE_INCOMPAT:
390 		JFFS2_ERROR("unknown INCOMPAT nodetype %#04X at %#08x\n",
391 			    je16_to_cpu(un->nodetype), ref_offset(ref));
392 		/* EEP */
393 		BUG();
394 		break;
395 
396 	case JFFS2_FEATURE_ROCOMPAT:
397 		JFFS2_ERROR("unknown ROCOMPAT nodetype %#04X at %#08x\n",
398 			    je16_to_cpu(un->nodetype), ref_offset(ref));
399 		BUG_ON(!(c->flags & JFFS2_SB_FLAG_RO));
400 		break;
401 
402 	case JFFS2_FEATURE_RWCOMPAT_COPY:
403 		JFFS2_NOTICE("unknown RWCOMPAT_COPY nodetype %#04X at %#08x\n",
404 			     je16_to_cpu(un->nodetype), ref_offset(ref));
405 		break;
406 
407 	case JFFS2_FEATURE_RWCOMPAT_DELETE:
408 		JFFS2_NOTICE("unknown RWCOMPAT_DELETE nodetype %#04X at %#08x\n",
409 			     je16_to_cpu(un->nodetype), ref_offset(ref));
410 		return 1;
411 	}
412 
413 	return 0;
414 }
415 
416 /*
417  * Helper function for jffs2_get_inode_nodes().
418  * The function detects whether more data should be read and reads it if yes.
419  *
420  * Returns: 0 on succes;
421  * 	    negative error code on failure.
422  */
423 static int read_more(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
424 		     int right_size, int *rdlen, unsigned char *buf, unsigned char *bufstart)
425 {
426 	int right_len, err, len;
427 	size_t retlen;
428 	uint32_t offs;
429 
430 	if (jffs2_is_writebuffered(c)) {
431 		right_len = c->wbuf_pagesize - (bufstart - buf);
432 		if (right_size + (int)(bufstart - buf) > c->wbuf_pagesize)
433 			right_len += c->wbuf_pagesize;
434 	} else
435 		right_len = right_size;
436 
437 	if (*rdlen == right_len)
438 		return 0;
439 
440 	/* We need to read more data */
441 	offs = ref_offset(ref) + *rdlen;
442 	if (jffs2_is_writebuffered(c)) {
443 		bufstart = buf + c->wbuf_pagesize;
444 		len = c->wbuf_pagesize;
445 	} else {
446 		bufstart = buf + *rdlen;
447 		len = right_size - *rdlen;
448 	}
449 
450 	dbg_readinode("read more %d bytes\n", len);
451 
452 	err = jffs2_flash_read(c, offs, len, &retlen, bufstart);
453 	if (err) {
454 		JFFS2_ERROR("can not read %d bytes from 0x%08x, "
455 			"error code: %d.\n", len, offs, err);
456 		return err;
457 	}
458 
459 	if (retlen < len) {
460 		JFFS2_ERROR("short read at %#08x: %zu instead of %d.\n",
461 				offs, retlen, len);
462 		return -EIO;
463 	}
464 
465 	*rdlen = right_len;
466 
467 	return 0;
468 }
469 
470 /* Get tmp_dnode_info and full_dirent for all non-obsolete nodes associated
471    with this ino, returning the former in order of version */
472 static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
473 				 struct rb_root *tnp, struct jffs2_full_dirent **fdp,
474 				 uint32_t *highest_version, uint32_t *latest_mctime,
475 				 uint32_t *mctime_ver)
476 {
477 	struct jffs2_raw_node_ref *ref, *valid_ref;
478 	struct rb_root ret_tn = RB_ROOT;
479 	struct jffs2_full_dirent *ret_fd = NULL;
480 	unsigned char *buf = NULL;
481 	union jffs2_node_union *node;
482 	size_t retlen;
483 	int len, err;
484 
485 	*mctime_ver = 0;
486 
487 	dbg_readinode("ino #%u\n", f->inocache->ino);
488 
489 	if (jffs2_is_writebuffered(c)) {
490 		/*
491 		 * If we have the write buffer, we assume the minimal I/O unit
492 		 * is c->wbuf_pagesize. We implement some optimizations which in
493 		 * this case and we need a temporary buffer of size =
494 		 * 2*c->wbuf_pagesize bytes (see comments in read_dnode()).
495 		 * Basically, we want to read not only the node header, but the
496 		 * whole wbuf (NAND page in case of NAND) or 2, if the node
497 		 * header overlaps the border between the 2 wbufs.
498 		 */
499 		len = 2*c->wbuf_pagesize;
500 	} else {
501 		/*
502 		 * When there is no write buffer, the size of the temporary
503 		 * buffer is the size of the larges node header.
504 		 */
505 		len = sizeof(union jffs2_node_union);
506 	}
507 
508 	/* FIXME: in case of NOR and available ->point() this
509 	 * needs to be fixed. */
510 	buf = kmalloc(len, GFP_KERNEL);
511 	if (!buf)
512 		return -ENOMEM;
513 
514 	spin_lock(&c->erase_completion_lock);
515 	valid_ref = jffs2_first_valid_node(f->inocache->nodes);
516 	if (!valid_ref && f->inocache->ino != 1)
517 		JFFS2_WARNING("Eep. No valid nodes for ino #%u.\n", f->inocache->ino);
518 	while (valid_ref) {
519 		unsigned char *bufstart;
520 
521 		/* We can hold a pointer to a non-obsolete node without the spinlock,
522 		   but _obsolete_ nodes may disappear at any time, if the block
523 		   they're in gets erased. So if we mark 'ref' obsolete while we're
524 		   not holding the lock, it can go away immediately. For that reason,
525 		   we find the next valid node first, before processing 'ref'.
526 		*/
527 		ref = valid_ref;
528 		valid_ref = jffs2_first_valid_node(ref->next_in_ino);
529 		spin_unlock(&c->erase_completion_lock);
530 
531 		cond_resched();
532 
533 		/*
534 		 * At this point we don't know the type of the node we're going
535 		 * to read, so we do not know the size of its header. In order
536 		 * to minimize the amount of flash IO we assume the node has
537 		 * size = JFFS2_MIN_NODE_HEADER.
538 		 */
539 		if (jffs2_is_writebuffered(c)) {
540 			/*
541 			 * We treat 'buf' as 2 adjacent wbufs. We want to
542 			 * adjust bufstart such as it points to the
543 			 * beginning of the node within this wbuf.
544 			 */
545 			bufstart = buf + (ref_offset(ref) % c->wbuf_pagesize);
546 			/* We will read either one wbuf or 2 wbufs. */
547 			len = c->wbuf_pagesize - (bufstart - buf);
548 			if (JFFS2_MIN_NODE_HEADER + (int)(bufstart - buf) > c->wbuf_pagesize) {
549 				/* The header spans the border of the first wbuf */
550 				len += c->wbuf_pagesize;
551 			}
552 		} else {
553 			bufstart = buf;
554 			len = JFFS2_MIN_NODE_HEADER;
555 		}
556 
557 		dbg_readinode("read %d bytes at %#08x(%d).\n", len, ref_offset(ref), ref_flags(ref));
558 
559 		/* FIXME: point() */
560 		err = jffs2_flash_read(c, ref_offset(ref), len,
561 				       &retlen, bufstart);
562 		if (err) {
563 			JFFS2_ERROR("can not read %d bytes from 0x%08x, " "error code: %d.\n", len, ref_offset(ref), err);
564 			goto free_out;
565 		}
566 
567 		if (retlen < len) {
568 			JFFS2_ERROR("short read at %#08x: %zu instead of %d.\n", ref_offset(ref), retlen, len);
569 			err = -EIO;
570 			goto free_out;
571 		}
572 
573 		node = (union jffs2_node_union *)bufstart;
574 
575 		/* No need to mask in the valid bit; it shouldn't be invalid */
576 		if (je32_to_cpu(node->u.hdr_crc) != crc32(0, node, sizeof(node->u)-4)) {
577 			JFFS2_NOTICE("Node header CRC failed at %#08x. {%04x,%04x,%08x,%08x}\n",
578 				     ref_offset(ref), je16_to_cpu(node->u.magic),
579 				     je16_to_cpu(node->u.nodetype),
580 				     je32_to_cpu(node->u.totlen),
581 				     je32_to_cpu(node->u.hdr_crc));
582 			jffs2_dbg_dump_node(c, ref_offset(ref));
583 			jffs2_mark_node_obsolete(c, ref);
584 			goto cont;
585 		}
586 		/* Due to poor choice of crc32 seed, an all-zero node will have a correct CRC */
587 		if (!je32_to_cpu(node->u.hdr_crc) && !je16_to_cpu(node->u.nodetype) &&
588 		    !je16_to_cpu(node->u.magic) && !je32_to_cpu(node->u.totlen)) {
589 			JFFS2_NOTICE("All zero node header at %#08x.\n", ref_offset(ref));
590 			jffs2_mark_node_obsolete(c, ref);
591 			goto cont;
592 		}
593 
594 		switch (je16_to_cpu(node->u.nodetype)) {
595 
596 		case JFFS2_NODETYPE_DIRENT:
597 
598 			if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_dirent)) {
599 				err = read_more(c, ref, sizeof(struct jffs2_raw_dirent), &len, buf, bufstart);
600 				if (unlikely(err))
601 					goto free_out;
602 			}
603 
604 			err = read_direntry(c, ref, &node->d, retlen, &ret_fd, latest_mctime, mctime_ver);
605 			if (err == 1) {
606 				jffs2_mark_node_obsolete(c, ref);
607 				break;
608 			} else if (unlikely(err))
609 				goto free_out;
610 
611 			if (je32_to_cpu(node->d.version) > *highest_version)
612 				*highest_version = je32_to_cpu(node->d.version);
613 
614 			break;
615 
616 		case JFFS2_NODETYPE_INODE:
617 
618 			if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_inode)) {
619 				err = read_more(c, ref, sizeof(struct jffs2_raw_inode), &len, buf, bufstart);
620 				if (unlikely(err))
621 					goto free_out;
622 			}
623 
624 			err = read_dnode(c, ref, &node->i, &ret_tn, len, latest_mctime, mctime_ver);
625 			if (err == 1) {
626 				jffs2_mark_node_obsolete(c, ref);
627 				break;
628 			} else if (unlikely(err))
629 				goto free_out;
630 
631 			if (je32_to_cpu(node->i.version) > *highest_version)
632 				*highest_version = je32_to_cpu(node->i.version);
633 
634 			break;
635 
636 		default:
637 			if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_unknown_node)) {
638 				err = read_more(c, ref, sizeof(struct jffs2_unknown_node), &len, buf, bufstart);
639 				if (unlikely(err))
640 					goto free_out;
641 			}
642 
643 			err = read_unknown(c, ref, &node->u);
644 			if (err == 1) {
645 				jffs2_mark_node_obsolete(c, ref);
646 				break;
647 			} else if (unlikely(err))
648 				goto free_out;
649 
650 		}
651 	cont:
652 		spin_lock(&c->erase_completion_lock);
653 	}
654 
655 	spin_unlock(&c->erase_completion_lock);
656 	*tnp = ret_tn;
657 	*fdp = ret_fd;
658 	kfree(buf);
659 
660 	dbg_readinode("nodes of inode #%u were read, the highest version is %u, latest_mctime %u, mctime_ver %u.\n",
661 			f->inocache->ino, *highest_version, *latest_mctime, *mctime_ver);
662 	return 0;
663 
664  free_out:
665 	jffs2_free_tmp_dnode_info_list(&ret_tn);
666 	jffs2_free_full_dirent_list(ret_fd);
667 	kfree(buf);
668 	return err;
669 }
670 
671 static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
672 					struct jffs2_inode_info *f,
673 					struct jffs2_raw_inode *latest_node)
674 {
675 	struct jffs2_tmp_dnode_info *tn;
676 	struct rb_root tn_list;
677 	struct rb_node *rb, *repl_rb;
678 	struct jffs2_full_dirent *fd_list;
679 	struct jffs2_full_dnode *fn, *first_fn = NULL;
680 	uint32_t crc;
681 	uint32_t latest_mctime, mctime_ver;
682 	size_t retlen;
683 	int ret;
684 
685 	dbg_readinode("ino #%u nlink is %d\n", f->inocache->ino, f->inocache->nlink);
686 
687 	/* Grab all nodes relevant to this ino */
688 	ret = jffs2_get_inode_nodes(c, f, &tn_list, &fd_list, &f->highest_version, &latest_mctime, &mctime_ver);
689 
690 	if (ret) {
691 		JFFS2_ERROR("cannot read nodes for ino %u, returned error is %d\n", f->inocache->ino, ret);
692 		if (f->inocache->state == INO_STATE_READING)
693 			jffs2_set_inocache_state(c, f->inocache, INO_STATE_CHECKEDABSENT);
694 		return ret;
695 	}
696 	f->dents = fd_list;
697 
698 	rb = rb_first(&tn_list);
699 
700 	while (rb) {
701 		cond_resched();
702 		tn = rb_entry(rb, struct jffs2_tmp_dnode_info, rb);
703 		fn = tn->fn;
704 		ret = 1;
705 		dbg_readinode("consider node ver %u, phys offset "
706 			"%#08x(%d), range %u-%u.\n", tn->version,
707 			ref_offset(fn->raw), ref_flags(fn->raw),
708 			fn->ofs, fn->ofs + fn->size);
709 
710 		if (fn->size) {
711 			ret = jffs2_add_older_frag_to_fragtree(c, f, tn);
712 			/* TODO: the error code isn't checked, check it */
713 			jffs2_dbg_fragtree_paranoia_check_nolock(f);
714 			BUG_ON(ret < 0);
715 			if (!first_fn && ret == 0)
716 				first_fn = fn;
717 		} else if (!first_fn) {
718 			first_fn = fn;
719 			f->metadata = fn;
720 			ret = 0; /* Prevent freeing the metadata update node */
721 		} else
722 			jffs2_mark_node_obsolete(c, fn->raw);
723 
724 		BUG_ON(rb->rb_left);
725 		if (rb_parent(rb) && rb_parent(rb)->rb_left == rb) {
726 			/* We were then left-hand child of our parent. We need
727 			 * to move our own right-hand child into our place. */
728 			repl_rb = rb->rb_right;
729 			if (repl_rb)
730 				rb_set_parent(repl_rb, rb_parent(rb));
731 		} else
732 			repl_rb = NULL;
733 
734 		rb = rb_next(rb);
735 
736 		/* Remove the spent tn from the tree; don't bother rebalancing
737 		 * but put our right-hand child in our own place. */
738 		if (rb_parent(&tn->rb)) {
739 			if (rb_parent(&tn->rb)->rb_left == &tn->rb)
740 				rb_parent(&tn->rb)->rb_left = repl_rb;
741 			else if (rb_parent(&tn->rb)->rb_right == &tn->rb)
742 				rb_parent(&tn->rb)->rb_right = repl_rb;
743 			else BUG();
744 		} else if (tn->rb.rb_right)
745 			rb_set_parent(tn->rb.rb_right, NULL);
746 
747 		jffs2_free_tmp_dnode_info(tn);
748 		if (ret) {
749 			dbg_readinode("delete dnode %u-%u.\n",
750 				fn->ofs, fn->ofs + fn->size);
751 			jffs2_free_full_dnode(fn);
752 		}
753 	}
754 	jffs2_dbg_fragtree_paranoia_check_nolock(f);
755 
756 	BUG_ON(first_fn && ref_obsolete(first_fn->raw));
757 
758 	fn = first_fn;
759 	if (unlikely(!first_fn)) {
760 		/* No data nodes for this inode. */
761 		if (f->inocache->ino != 1) {
762 			JFFS2_WARNING("no data nodes found for ino #%u\n", f->inocache->ino);
763 			if (!fd_list) {
764 				if (f->inocache->state == INO_STATE_READING)
765 					jffs2_set_inocache_state(c, f->inocache, INO_STATE_CHECKEDABSENT);
766 				return -EIO;
767 			}
768 			JFFS2_NOTICE("but it has children so we fake some modes for it\n");
769 		}
770 		latest_node->mode = cpu_to_jemode(S_IFDIR|S_IRUGO|S_IWUSR|S_IXUGO);
771 		latest_node->version = cpu_to_je32(0);
772 		latest_node->atime = latest_node->ctime = latest_node->mtime = cpu_to_je32(0);
773 		latest_node->isize = cpu_to_je32(0);
774 		latest_node->gid = cpu_to_je16(0);
775 		latest_node->uid = cpu_to_je16(0);
776 		if (f->inocache->state == INO_STATE_READING)
777 			jffs2_set_inocache_state(c, f->inocache, INO_STATE_PRESENT);
778 		return 0;
779 	}
780 
781 	ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(*latest_node), &retlen, (void *)latest_node);
782 	if (ret || retlen != sizeof(*latest_node)) {
783 		JFFS2_ERROR("failed to read from flash: error %d, %zd of %zd bytes read\n",
784 			ret, retlen, sizeof(*latest_node));
785 		/* FIXME: If this fails, there seems to be a memory leak. Find it. */
786 		up(&f->sem);
787 		jffs2_do_clear_inode(c, f);
788 		return ret?ret:-EIO;
789 	}
790 
791 	crc = crc32(0, latest_node, sizeof(*latest_node)-8);
792 	if (crc != je32_to_cpu(latest_node->node_crc)) {
793 		JFFS2_ERROR("CRC failed for read_inode of inode %u at physical location 0x%x\n",
794 			f->inocache->ino, ref_offset(fn->raw));
795 		up(&f->sem);
796 		jffs2_do_clear_inode(c, f);
797 		return -EIO;
798 	}
799 
800 	switch(jemode_to_cpu(latest_node->mode) & S_IFMT) {
801 	case S_IFDIR:
802 		if (mctime_ver > je32_to_cpu(latest_node->version)) {
803 			/* The times in the latest_node are actually older than
804 			   mctime in the latest dirent. Cheat. */
805 			latest_node->ctime = latest_node->mtime = cpu_to_je32(latest_mctime);
806 		}
807 		break;
808 
809 
810 	case S_IFREG:
811 		/* If it was a regular file, truncate it to the latest node's isize */
812 		jffs2_truncate_fragtree(c, &f->fragtree, je32_to_cpu(latest_node->isize));
813 		break;
814 
815 	case S_IFLNK:
816 		/* Hack to work around broken isize in old symlink code.
817 		   Remove this when dwmw2 comes to his senses and stops
818 		   symlinks from being an entirely gratuitous special
819 		   case. */
820 		if (!je32_to_cpu(latest_node->isize))
821 			latest_node->isize = latest_node->dsize;
822 
823 		if (f->inocache->state != INO_STATE_CHECKING) {
824 			/* Symlink's inode data is the target path. Read it and
825 			 * keep in RAM to facilitate quick follow symlink
826 			 * operation. */
827 			f->target = kmalloc(je32_to_cpu(latest_node->csize) + 1, GFP_KERNEL);
828 			if (!f->target) {
829 				JFFS2_ERROR("can't allocate %d bytes of memory for the symlink target path cache\n", je32_to_cpu(latest_node->csize));
830 				up(&f->sem);
831 				jffs2_do_clear_inode(c, f);
832 				return -ENOMEM;
833 			}
834 
835 			ret = jffs2_flash_read(c, ref_offset(fn->raw) + sizeof(*latest_node),
836 						je32_to_cpu(latest_node->csize), &retlen, (char *)f->target);
837 
838 			if (ret  || retlen != je32_to_cpu(latest_node->csize)) {
839 				if (retlen != je32_to_cpu(latest_node->csize))
840 					ret = -EIO;
841 				kfree(f->target);
842 				f->target = NULL;
843 				up(&f->sem);
844 				jffs2_do_clear_inode(c, f);
845 				return -ret;
846 			}
847 
848 			f->target[je32_to_cpu(latest_node->csize)] = '\0';
849 			dbg_readinode("symlink's target '%s' cached\n", f->target);
850 		}
851 
852 		/* fall through... */
853 
854 	case S_IFBLK:
855 	case S_IFCHR:
856 		/* Certain inode types should have only one data node, and it's
857 		   kept as the metadata node */
858 		if (f->metadata) {
859 			JFFS2_ERROR("Argh. Special inode #%u with mode 0%o had metadata node\n",
860 			       f->inocache->ino, jemode_to_cpu(latest_node->mode));
861 			up(&f->sem);
862 			jffs2_do_clear_inode(c, f);
863 			return -EIO;
864 		}
865 		if (!frag_first(&f->fragtree)) {
866 			JFFS2_ERROR("Argh. Special inode #%u with mode 0%o has no fragments\n",
867 			       f->inocache->ino, jemode_to_cpu(latest_node->mode));
868 			up(&f->sem);
869 			jffs2_do_clear_inode(c, f);
870 			return -EIO;
871 		}
872 		/* ASSERT: f->fraglist != NULL */
873 		if (frag_next(frag_first(&f->fragtree))) {
874 			JFFS2_ERROR("Argh. Special inode #%u with mode 0x%x had more than one node\n",
875 			       f->inocache->ino, jemode_to_cpu(latest_node->mode));
876 			/* FIXME: Deal with it - check crc32, check for duplicate node, check times and discard the older one */
877 			up(&f->sem);
878 			jffs2_do_clear_inode(c, f);
879 			return -EIO;
880 		}
881 		/* OK. We're happy */
882 		f->metadata = frag_first(&f->fragtree)->node;
883 		jffs2_free_node_frag(frag_first(&f->fragtree));
884 		f->fragtree = RB_ROOT;
885 		break;
886 	}
887 	if (f->inocache->state == INO_STATE_READING)
888 		jffs2_set_inocache_state(c, f->inocache, INO_STATE_PRESENT);
889 
890 	return 0;
891 }
892 
893 /* Scan the list of all nodes present for this ino, build map of versions, etc. */
894 int jffs2_do_read_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
895 			uint32_t ino, struct jffs2_raw_inode *latest_node)
896 {
897 	dbg_readinode("read inode #%u\n", ino);
898 
899  retry_inocache:
900 	spin_lock(&c->inocache_lock);
901 	f->inocache = jffs2_get_ino_cache(c, ino);
902 
903 	if (f->inocache) {
904 		/* Check its state. We may need to wait before we can use it */
905 		switch(f->inocache->state) {
906 		case INO_STATE_UNCHECKED:
907 		case INO_STATE_CHECKEDABSENT:
908 			f->inocache->state = INO_STATE_READING;
909 			break;
910 
911 		case INO_STATE_CHECKING:
912 		case INO_STATE_GC:
913 			/* If it's in either of these states, we need
914 			   to wait for whoever's got it to finish and
915 			   put it back. */
916 			dbg_readinode("waiting for ino #%u in state %d\n", ino, f->inocache->state);
917 			sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
918 			goto retry_inocache;
919 
920 		case INO_STATE_READING:
921 		case INO_STATE_PRESENT:
922 			/* Eep. This should never happen. It can
923 			happen if Linux calls read_inode() again
924 			before clear_inode() has finished though. */
925 			JFFS2_ERROR("Eep. Trying to read_inode #%u when it's already in state %d!\n", ino, f->inocache->state);
926 			/* Fail. That's probably better than allowing it to succeed */
927 			f->inocache = NULL;
928 			break;
929 
930 		default:
931 			BUG();
932 		}
933 	}
934 	spin_unlock(&c->inocache_lock);
935 
936 	if (!f->inocache && ino == 1) {
937 		/* Special case - no root inode on medium */
938 		f->inocache = jffs2_alloc_inode_cache();
939 		if (!f->inocache) {
940 			JFFS2_ERROR("cannot allocate inocache for root inode\n");
941 			return -ENOMEM;
942 		}
943 		dbg_readinode("creating inocache for root inode\n");
944 		memset(f->inocache, 0, sizeof(struct jffs2_inode_cache));
945 		f->inocache->ino = f->inocache->nlink = 1;
946 		f->inocache->nodes = (struct jffs2_raw_node_ref *)f->inocache;
947 		f->inocache->state = INO_STATE_READING;
948 		jffs2_add_ino_cache(c, f->inocache);
949 	}
950 	if (!f->inocache) {
951 		JFFS2_ERROR("requestied to read an nonexistent ino %u\n", ino);
952 		return -ENOENT;
953 	}
954 
955 	return jffs2_do_read_inode_internal(c, f, latest_node);
956 }
957 
958 int jffs2_do_crccheck_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic)
959 {
960 	struct jffs2_raw_inode n;
961 	struct jffs2_inode_info *f = kzalloc(sizeof(*f), GFP_KERNEL);
962 	int ret;
963 
964 	if (!f)
965 		return -ENOMEM;
966 
967 	init_MUTEX_LOCKED(&f->sem);
968 	f->inocache = ic;
969 
970 	ret = jffs2_do_read_inode_internal(c, f, &n);
971 	if (!ret) {
972 		up(&f->sem);
973 		jffs2_do_clear_inode(c, f);
974 	}
975 	kfree (f);
976 	return ret;
977 }
978 
979 void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f)
980 {
981 	struct jffs2_full_dirent *fd, *fds;
982 	int deleted;
983 
984 	jffs2_clear_acl(f);
985 	jffs2_xattr_delete_inode(c, f->inocache);
986 	down(&f->sem);
987 	deleted = f->inocache && !f->inocache->nlink;
988 
989 	if (f->inocache && f->inocache->state != INO_STATE_CHECKING)
990 		jffs2_set_inocache_state(c, f->inocache, INO_STATE_CLEARING);
991 
992 	if (f->metadata) {
993 		if (deleted)
994 			jffs2_mark_node_obsolete(c, f->metadata->raw);
995 		jffs2_free_full_dnode(f->metadata);
996 	}
997 
998 	jffs2_kill_fragtree(&f->fragtree, deleted?c:NULL);
999 
1000 	if (f->target) {
1001 		kfree(f->target);
1002 		f->target = NULL;
1003 	}
1004 
1005 	fds = f->dents;
1006 	while(fds) {
1007 		fd = fds;
1008 		fds = fd->next;
1009 		jffs2_free_full_dirent(fd);
1010 	}
1011 
1012 	if (f->inocache && f->inocache->state != INO_STATE_CHECKING) {
1013 		jffs2_set_inocache_state(c, f->inocache, INO_STATE_CHECKEDABSENT);
1014 		if (f->inocache->nodes == (void *)f->inocache)
1015 			jffs2_del_ino_cache(c, f->inocache);
1016 	}
1017 
1018 	up(&f->sem);
1019 }
1020