xref: /linux/fs/jffs2/gc.c (revision c537b994505099b7197e7d3125b942ecbcc51eb6)
1 /*
2  * JFFS2 -- Journalling Flash File System, Version 2.
3  *
4  * Copyright (C) 2001-2003 Red Hat, Inc.
5  *
6  * Created by David Woodhouse <dwmw2@infradead.org>
7  *
8  * For licensing information, see the file 'LICENCE' in this directory.
9  *
10  * $Id: gc.c,v 1.155 2005/11/07 11:14:39 gleixner Exp $
11  *
12  */
13 
14 #include <linux/kernel.h>
15 #include <linux/mtd/mtd.h>
16 #include <linux/slab.h>
17 #include <linux/pagemap.h>
18 #include <linux/crc32.h>
19 #include <linux/compiler.h>
20 #include <linux/stat.h>
21 #include "nodelist.h"
22 #include "compr.h"
23 
24 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
25 					  struct jffs2_inode_cache *ic,
26 					  struct jffs2_raw_node_ref *raw);
27 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
28 					struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
29 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
30 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
31 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
32 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
33 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
34 				      struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
35 				      uint32_t start, uint32_t end);
36 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
37 				       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
38 				       uint32_t start, uint32_t end);
39 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
40 			       struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
41 
42 /* Called with erase_completion_lock held */
43 static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
44 {
45 	struct jffs2_eraseblock *ret;
46 	struct list_head *nextlist = NULL;
47 	int n = jiffies % 128;
48 
49 	/* Pick an eraseblock to garbage collect next. This is where we'll
50 	   put the clever wear-levelling algorithms. Eventually.  */
51 	/* We possibly want to favour the dirtier blocks more when the
52 	   number of free blocks is low. */
53 again:
54 	if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
55 		D1(printk(KERN_DEBUG "Picking block from bad_used_list to GC next\n"));
56 		nextlist = &c->bad_used_list;
57 	} else if (n < 50 && !list_empty(&c->erasable_list)) {
58 		/* Note that most of them will have gone directly to be erased.
59 		   So don't favour the erasable_list _too_ much. */
60 		D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next\n"));
61 		nextlist = &c->erasable_list;
62 	} else if (n < 110 && !list_empty(&c->very_dirty_list)) {
63 		/* Most of the time, pick one off the very_dirty list */
64 		D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next\n"));
65 		nextlist = &c->very_dirty_list;
66 	} else if (n < 126 && !list_empty(&c->dirty_list)) {
67 		D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next\n"));
68 		nextlist = &c->dirty_list;
69 	} else if (!list_empty(&c->clean_list)) {
70 		D1(printk(KERN_DEBUG "Picking block from clean_list to GC next\n"));
71 		nextlist = &c->clean_list;
72 	} else if (!list_empty(&c->dirty_list)) {
73 		D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next (clean_list was empty)\n"));
74 
75 		nextlist = &c->dirty_list;
76 	} else if (!list_empty(&c->very_dirty_list)) {
77 		D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n"));
78 		nextlist = &c->very_dirty_list;
79 	} else if (!list_empty(&c->erasable_list)) {
80 		D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n"));
81 
82 		nextlist = &c->erasable_list;
83 	} else if (!list_empty(&c->erasable_pending_wbuf_list)) {
84 		/* There are blocks are wating for the wbuf sync */
85 		D1(printk(KERN_DEBUG "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n"));
86 		spin_unlock(&c->erase_completion_lock);
87 		jffs2_flush_wbuf_pad(c);
88 		spin_lock(&c->erase_completion_lock);
89 		goto again;
90 	} else {
91 		/* Eep. All were empty */
92 		D1(printk(KERN_NOTICE "jffs2: No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n"));
93 		return NULL;
94 	}
95 
96 	ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
97 	list_del(&ret->list);
98 	c->gcblock = ret;
99 	ret->gc_node = ret->first_node;
100 	if (!ret->gc_node) {
101 		printk(KERN_WARNING "Eep. ret->gc_node for block at 0x%08x is NULL\n", ret->offset);
102 		BUG();
103 	}
104 
105 	/* Have we accidentally picked a clean block with wasted space ? */
106 	if (ret->wasted_size) {
107 		D1(printk(KERN_DEBUG "Converting wasted_size %08x to dirty_size\n", ret->wasted_size));
108 		ret->dirty_size += ret->wasted_size;
109 		c->wasted_size -= ret->wasted_size;
110 		c->dirty_size += ret->wasted_size;
111 		ret->wasted_size = 0;
112 	}
113 
114 	return ret;
115 }
116 
117 /* jffs2_garbage_collect_pass
118  * Make a single attempt to progress GC. Move one node, and possibly
119  * start erasing one eraseblock.
120  */
121 int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
122 {
123 	struct jffs2_inode_info *f;
124 	struct jffs2_inode_cache *ic;
125 	struct jffs2_eraseblock *jeb;
126 	struct jffs2_raw_node_ref *raw;
127 	int ret = 0, inum, nlink;
128 	int xattr = 0;
129 
130 	if (down_interruptible(&c->alloc_sem))
131 		return -EINTR;
132 
133 	for (;;) {
134 		spin_lock(&c->erase_completion_lock);
135 		if (!c->unchecked_size)
136 			break;
137 
138 		/* We can't start doing GC yet. We haven't finished checking
139 		   the node CRCs etc. Do it now. */
140 
141 		/* checked_ino is protected by the alloc_sem */
142 		if (c->checked_ino > c->highest_ino && xattr) {
143 			printk(KERN_CRIT "Checked all inodes but still 0x%x bytes of unchecked space?\n",
144 			       c->unchecked_size);
145 			jffs2_dbg_dump_block_lists_nolock(c);
146 			spin_unlock(&c->erase_completion_lock);
147 			BUG();
148 		}
149 
150 		spin_unlock(&c->erase_completion_lock);
151 
152 		if (!xattr)
153 			xattr = jffs2_verify_xattr(c);
154 
155 		spin_lock(&c->inocache_lock);
156 
157 		ic = jffs2_get_ino_cache(c, c->checked_ino++);
158 
159 		if (!ic) {
160 			spin_unlock(&c->inocache_lock);
161 			continue;
162 		}
163 
164 		if (!ic->nlink) {
165 			D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink zero\n",
166 				  ic->ino));
167 			spin_unlock(&c->inocache_lock);
168 			jffs2_xattr_delete_inode(c, ic);
169 			continue;
170 		}
171 		switch(ic->state) {
172 		case INO_STATE_CHECKEDABSENT:
173 		case INO_STATE_PRESENT:
174 			D1(printk(KERN_DEBUG "Skipping ino #%u already checked\n", ic->ino));
175 			spin_unlock(&c->inocache_lock);
176 			continue;
177 
178 		case INO_STATE_GC:
179 		case INO_STATE_CHECKING:
180 			printk(KERN_WARNING "Inode #%u is in state %d during CRC check phase!\n", ic->ino, ic->state);
181 			spin_unlock(&c->inocache_lock);
182 			BUG();
183 
184 		case INO_STATE_READING:
185 			/* We need to wait for it to finish, lest we move on
186 			   and trigger the BUG() above while we haven't yet
187 			   finished checking all its nodes */
188 			D1(printk(KERN_DEBUG "Waiting for ino #%u to finish reading\n", ic->ino));
189 			/* We need to come back again for the _same_ inode. We've
190 			 made no progress in this case, but that should be OK */
191 			c->checked_ino--;
192 
193 			up(&c->alloc_sem);
194 			sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
195 			return 0;
196 
197 		default:
198 			BUG();
199 
200 		case INO_STATE_UNCHECKED:
201 			;
202 		}
203 		ic->state = INO_STATE_CHECKING;
204 		spin_unlock(&c->inocache_lock);
205 
206 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() triggering inode scan of ino#%u\n", ic->ino));
207 
208 		ret = jffs2_do_crccheck_inode(c, ic);
209 		if (ret)
210 			printk(KERN_WARNING "Returned error for crccheck of ino #%u. Expect badness...\n", ic->ino);
211 
212 		jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
213 		up(&c->alloc_sem);
214 		return ret;
215 	}
216 
217 	/* First, work out which block we're garbage-collecting */
218 	jeb = c->gcblock;
219 
220 	if (!jeb)
221 		jeb = jffs2_find_gc_block(c);
222 
223 	if (!jeb) {
224 		D1 (printk(KERN_NOTICE "jffs2: Couldn't find erase block to garbage collect!\n"));
225 		spin_unlock(&c->erase_completion_lock);
226 		up(&c->alloc_sem);
227 		return -EIO;
228 	}
229 
230 	D1(printk(KERN_DEBUG "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n", jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size));
231 	D1(if (c->nextblock)
232 	   printk(KERN_DEBUG "Nextblock at  %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
233 
234 	if (!jeb->used_size) {
235 		up(&c->alloc_sem);
236 		goto eraseit;
237 	}
238 
239 	raw = jeb->gc_node;
240 
241 	while(ref_obsolete(raw)) {
242 		D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw)));
243 		raw = ref_next(raw);
244 		if (unlikely(!raw)) {
245 			printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n");
246 			printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
247 			       jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size);
248 			jeb->gc_node = raw;
249 			spin_unlock(&c->erase_completion_lock);
250 			up(&c->alloc_sem);
251 			BUG();
252 		}
253 	}
254 	jeb->gc_node = raw;
255 
256 	D1(printk(KERN_DEBUG "Going to garbage collect node at 0x%08x\n", ref_offset(raw)));
257 
258 	if (!raw->next_in_ino) {
259 		/* Inode-less node. Clean marker, snapshot or something like that */
260 		spin_unlock(&c->erase_completion_lock);
261 		if (ref_flags(raw) == REF_PRISTINE) {
262 			/* It's an unknown node with JFFS2_FEATURE_RWCOMPAT_COPY */
263 			jffs2_garbage_collect_pristine(c, NULL, raw);
264 		} else {
265 			/* Just mark it obsolete */
266 			jffs2_mark_node_obsolete(c, raw);
267 		}
268 		up(&c->alloc_sem);
269 		goto eraseit_lock;
270 	}
271 
272 	ic = jffs2_raw_ref_to_ic(raw);
273 
274 #ifdef CONFIG_JFFS2_FS_XATTR
275 	/* When 'ic' refers xattr_datum/xattr_ref, this node is GCed as xattr.
276 	 * We can decide whether this node is inode or xattr by ic->class.     */
277 	if (ic->class == RAWNODE_CLASS_XATTR_DATUM
278 	    || ic->class == RAWNODE_CLASS_XATTR_REF) {
279 		spin_unlock(&c->erase_completion_lock);
280 
281 		if (ic->class == RAWNODE_CLASS_XATTR_DATUM) {
282 			ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic, raw);
283 		} else {
284 			ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic, raw);
285 		}
286 		goto release_sem;
287 	}
288 #endif
289 
290 	/* We need to hold the inocache. Either the erase_completion_lock or
291 	   the inocache_lock are sufficient; we trade down since the inocache_lock
292 	   causes less contention. */
293 	spin_lock(&c->inocache_lock);
294 
295 	spin_unlock(&c->erase_completion_lock);
296 
297 	D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), ic->ino));
298 
299 	/* Three possibilities:
300 	   1. Inode is already in-core. We must iget it and do proper
301 	      updating to its fragtree, etc.
302 	   2. Inode is not in-core, node is REF_PRISTINE. We lock the
303 	      inocache to prevent a read_inode(), copy the node intact.
304 	   3. Inode is not in-core, node is not pristine. We must iget()
305 	      and take the slow path.
306 	*/
307 
308 	switch(ic->state) {
309 	case INO_STATE_CHECKEDABSENT:
310 		/* It's been checked, but it's not currently in-core.
311 		   We can just copy any pristine nodes, but have
312 		   to prevent anyone else from doing read_inode() while
313 		   we're at it, so we set the state accordingly */
314 		if (ref_flags(raw) == REF_PRISTINE)
315 			ic->state = INO_STATE_GC;
316 		else {
317 			D1(printk(KERN_DEBUG "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
318 				  ic->ino));
319 		}
320 		break;
321 
322 	case INO_STATE_PRESENT:
323 		/* It's in-core. GC must iget() it. */
324 		break;
325 
326 	case INO_STATE_UNCHECKED:
327 	case INO_STATE_CHECKING:
328 	case INO_STATE_GC:
329 		/* Should never happen. We should have finished checking
330 		   by the time we actually start doing any GC, and since
331 		   we're holding the alloc_sem, no other garbage collection
332 		   can happen.
333 		*/
334 		printk(KERN_CRIT "Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
335 		       ic->ino, ic->state);
336 		up(&c->alloc_sem);
337 		spin_unlock(&c->inocache_lock);
338 		BUG();
339 
340 	case INO_STATE_READING:
341 		/* Someone's currently trying to read it. We must wait for
342 		   them to finish and then go through the full iget() route
343 		   to do the GC. However, sometimes read_inode() needs to get
344 		   the alloc_sem() (for marking nodes invalid) so we must
345 		   drop the alloc_sem before sleeping. */
346 
347 		up(&c->alloc_sem);
348 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n",
349 			  ic->ino, ic->state));
350 		sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
351 		/* And because we dropped the alloc_sem we must start again from the
352 		   beginning. Ponder chance of livelock here -- we're returning success
353 		   without actually making any progress.
354 
355 		   Q: What are the chances that the inode is back in INO_STATE_READING
356 		   again by the time we next enter this function? And that this happens
357 		   enough times to cause a real delay?
358 
359 		   A: Small enough that I don't care :)
360 		*/
361 		return 0;
362 	}
363 
364 	/* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
365 	   node intact, and we don't have to muck about with the fragtree etc.
366 	   because we know it's not in-core. If it _was_ in-core, we go through
367 	   all the iget() crap anyway */
368 
369 	if (ic->state == INO_STATE_GC) {
370 		spin_unlock(&c->inocache_lock);
371 
372 		ret = jffs2_garbage_collect_pristine(c, ic, raw);
373 
374 		spin_lock(&c->inocache_lock);
375 		ic->state = INO_STATE_CHECKEDABSENT;
376 		wake_up(&c->inocache_wq);
377 
378 		if (ret != -EBADFD) {
379 			spin_unlock(&c->inocache_lock);
380 			goto release_sem;
381 		}
382 
383 		/* Fall through if it wanted us to, with inocache_lock held */
384 	}
385 
386 	/* Prevent the fairly unlikely race where the gcblock is
387 	   entirely obsoleted by the final close of a file which had
388 	   the only valid nodes in the block, followed by erasure,
389 	   followed by freeing of the ic because the erased block(s)
390 	   held _all_ the nodes of that inode.... never been seen but
391 	   it's vaguely possible. */
392 
393 	inum = ic->ino;
394 	nlink = ic->nlink;
395 	spin_unlock(&c->inocache_lock);
396 
397 	f = jffs2_gc_fetch_inode(c, inum, nlink);
398 	if (IS_ERR(f)) {
399 		ret = PTR_ERR(f);
400 		goto release_sem;
401 	}
402 	if (!f) {
403 		ret = 0;
404 		goto release_sem;
405 	}
406 
407 	ret = jffs2_garbage_collect_live(c, jeb, raw, f);
408 
409 	jffs2_gc_release_inode(c, f);
410 
411  release_sem:
412 	up(&c->alloc_sem);
413 
414  eraseit_lock:
415 	/* If we've finished this block, start it erasing */
416 	spin_lock(&c->erase_completion_lock);
417 
418  eraseit:
419 	if (c->gcblock && !c->gcblock->used_size) {
420 		D1(printk(KERN_DEBUG "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n", c->gcblock->offset));
421 		/* We're GC'ing an empty block? */
422 		list_add_tail(&c->gcblock->list, &c->erase_pending_list);
423 		c->gcblock = NULL;
424 		c->nr_erasing_blocks++;
425 		jffs2_erase_pending_trigger(c);
426 	}
427 	spin_unlock(&c->erase_completion_lock);
428 
429 	return ret;
430 }
431 
432 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
433 				      struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
434 {
435 	struct jffs2_node_frag *frag;
436 	struct jffs2_full_dnode *fn = NULL;
437 	struct jffs2_full_dirent *fd;
438 	uint32_t start = 0, end = 0, nrfrags = 0;
439 	int ret = 0;
440 
441 	down(&f->sem);
442 
443 	/* Now we have the lock for this inode. Check that it's still the one at the head
444 	   of the list. */
445 
446 	spin_lock(&c->erase_completion_lock);
447 
448 	if (c->gcblock != jeb) {
449 		spin_unlock(&c->erase_completion_lock);
450 		D1(printk(KERN_DEBUG "GC block is no longer gcblock. Restart\n"));
451 		goto upnout;
452 	}
453 	if (ref_obsolete(raw)) {
454 		spin_unlock(&c->erase_completion_lock);
455 		D1(printk(KERN_DEBUG "node to be GC'd was obsoleted in the meantime.\n"));
456 		/* They'll call again */
457 		goto upnout;
458 	}
459 	spin_unlock(&c->erase_completion_lock);
460 
461 	/* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
462 	if (f->metadata && f->metadata->raw == raw) {
463 		fn = f->metadata;
464 		ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
465 		goto upnout;
466 	}
467 
468 	/* FIXME. Read node and do lookup? */
469 	for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
470 		if (frag->node && frag->node->raw == raw) {
471 			fn = frag->node;
472 			end = frag->ofs + frag->size;
473 			if (!nrfrags++)
474 				start = frag->ofs;
475 			if (nrfrags == frag->node->frags)
476 				break; /* We've found them all */
477 		}
478 	}
479 	if (fn) {
480 		if (ref_flags(raw) == REF_PRISTINE) {
481 			ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
482 			if (!ret) {
483 				/* Urgh. Return it sensibly. */
484 				frag->node->raw = f->inocache->nodes;
485 			}
486 			if (ret != -EBADFD)
487 				goto upnout;
488 		}
489 		/* We found a datanode. Do the GC */
490 		if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
491 			/* It crosses a page boundary. Therefore, it must be a hole. */
492 			ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
493 		} else {
494 			/* It could still be a hole. But we GC the page this way anyway */
495 			ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
496 		}
497 		goto upnout;
498 	}
499 
500 	/* Wasn't a dnode. Try dirent */
501 	for (fd = f->dents; fd; fd=fd->next) {
502 		if (fd->raw == raw)
503 			break;
504 	}
505 
506 	if (fd && fd->ino) {
507 		ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
508 	} else if (fd) {
509 		ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
510 	} else {
511 		printk(KERN_WARNING "Raw node at 0x%08x wasn't in node lists for ino #%u\n",
512 		       ref_offset(raw), f->inocache->ino);
513 		if (ref_obsolete(raw)) {
514 			printk(KERN_WARNING "But it's obsolete so we don't mind too much\n");
515 		} else {
516 			jffs2_dbg_dump_node(c, ref_offset(raw));
517 			BUG();
518 		}
519 	}
520  upnout:
521 	up(&f->sem);
522 
523 	return ret;
524 }
525 
526 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
527 					  struct jffs2_inode_cache *ic,
528 					  struct jffs2_raw_node_ref *raw)
529 {
530 	union jffs2_node_union *node;
531 	size_t retlen;
532 	int ret;
533 	uint32_t phys_ofs, alloclen;
534 	uint32_t crc, rawlen;
535 	int retried = 0;
536 
537 	D1(printk(KERN_DEBUG "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw)));
538 
539 	alloclen = rawlen = ref_totlen(c, c->gcblock, raw);
540 
541 	/* Ask for a small amount of space (or the totlen if smaller) because we
542 	   don't want to force wastage of the end of a block if splitting would
543 	   work. */
544 	if (ic && alloclen > sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN)
545 		alloclen = sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN;
546 
547 	ret = jffs2_reserve_space_gc(c, alloclen, &alloclen, rawlen);
548 	/* 'rawlen' is not the exact summary size; it is only an upper estimation */
549 
550 	if (ret)
551 		return ret;
552 
553 	if (alloclen < rawlen) {
554 		/* Doesn't fit untouched. We'll go the old route and split it */
555 		return -EBADFD;
556 	}
557 
558 	node = kmalloc(rawlen, GFP_KERNEL);
559 	if (!node)
560                return -ENOMEM;
561 
562 	ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
563 	if (!ret && retlen != rawlen)
564 		ret = -EIO;
565 	if (ret)
566 		goto out_node;
567 
568 	crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
569 	if (je32_to_cpu(node->u.hdr_crc) != crc) {
570 		printk(KERN_WARNING "Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
571 		       ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
572 		goto bail;
573 	}
574 
575 	switch(je16_to_cpu(node->u.nodetype)) {
576 	case JFFS2_NODETYPE_INODE:
577 		crc = crc32(0, node, sizeof(node->i)-8);
578 		if (je32_to_cpu(node->i.node_crc) != crc) {
579 			printk(KERN_WARNING "Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
580 			       ref_offset(raw), je32_to_cpu(node->i.node_crc), crc);
581 			goto bail;
582 		}
583 
584 		if (je32_to_cpu(node->i.dsize)) {
585 			crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
586 			if (je32_to_cpu(node->i.data_crc) != crc) {
587 				printk(KERN_WARNING "Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
588 				       ref_offset(raw), je32_to_cpu(node->i.data_crc), crc);
589 				goto bail;
590 			}
591 		}
592 		break;
593 
594 	case JFFS2_NODETYPE_DIRENT:
595 		crc = crc32(0, node, sizeof(node->d)-8);
596 		if (je32_to_cpu(node->d.node_crc) != crc) {
597 			printk(KERN_WARNING "Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
598 			       ref_offset(raw), je32_to_cpu(node->d.node_crc), crc);
599 			goto bail;
600 		}
601 
602 		if (node->d.nsize) {
603 			crc = crc32(0, node->d.name, node->d.nsize);
604 			if (je32_to_cpu(node->d.name_crc) != crc) {
605 				printk(KERN_WARNING "Name CRC failed on REF_PRISTINE dirent ode at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
606 				       ref_offset(raw), je32_to_cpu(node->d.name_crc), crc);
607 				goto bail;
608 			}
609 		}
610 		break;
611 	default:
612 		/* If it's inode-less, we don't _know_ what it is. Just copy it intact */
613 		if (ic) {
614 			printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
615 			       ref_offset(raw), je16_to_cpu(node->u.nodetype));
616 			goto bail;
617 		}
618 	}
619 
620 	/* OK, all the CRCs are good; this node can just be copied as-is. */
621  retry:
622 	phys_ofs = write_ofs(c);
623 
624 	ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
625 
626 	if (ret || (retlen != rawlen)) {
627 		printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
628                        rawlen, phys_ofs, ret, retlen);
629 		if (retlen) {
630 			jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, rawlen, NULL);
631 		} else {
632 			printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", phys_ofs);
633 		}
634 		if (!retried) {
635 			/* Try to reallocate space and retry */
636 			uint32_t dummy;
637 			struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
638 
639 			retried = 1;
640 
641 			D1(printk(KERN_DEBUG "Retrying failed write of REF_PRISTINE node.\n"));
642 
643 			jffs2_dbg_acct_sanity_check(c,jeb);
644 			jffs2_dbg_acct_paranoia_check(c, jeb);
645 
646 			ret = jffs2_reserve_space_gc(c, rawlen, &dummy, rawlen);
647 						/* this is not the exact summary size of it,
648 							it is only an upper estimation */
649 
650 			if (!ret) {
651 				D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", phys_ofs));
652 
653 				jffs2_dbg_acct_sanity_check(c,jeb);
654 				jffs2_dbg_acct_paranoia_check(c, jeb);
655 
656 				goto retry;
657 			}
658 			D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret));
659 		}
660 
661 		if (!ret)
662 			ret = -EIO;
663 		goto out_node;
664 	}
665 	jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, rawlen, ic);
666 
667 	jffs2_mark_node_obsolete(c, raw);
668 	D1(printk(KERN_DEBUG "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n", ref_offset(raw)));
669 
670  out_node:
671 	kfree(node);
672 	return ret;
673  bail:
674 	ret = -EBADFD;
675 	goto out_node;
676 }
677 
678 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
679 					struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
680 {
681 	struct jffs2_full_dnode *new_fn;
682 	struct jffs2_raw_inode ri;
683 	struct jffs2_node_frag *last_frag;
684 	union jffs2_device_node dev;
685 	char *mdata = NULL, mdatalen = 0;
686 	uint32_t alloclen, ilen;
687 	int ret;
688 
689 	if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
690 	    S_ISCHR(JFFS2_F_I_MODE(f)) ) {
691 		/* For these, we don't actually need to read the old node */
692 		mdatalen = jffs2_encode_dev(&dev, JFFS2_F_I_RDEV(f));
693 		mdata = (char *)&dev;
694 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bytes of kdev_t\n", mdatalen));
695 	} else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
696 		mdatalen = fn->size;
697 		mdata = kmalloc(fn->size, GFP_KERNEL);
698 		if (!mdata) {
699 			printk(KERN_WARNING "kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
700 			return -ENOMEM;
701 		}
702 		ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
703 		if (ret) {
704 			printk(KERN_WARNING "read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n", ret);
705 			kfree(mdata);
706 			return ret;
707 		}
708 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bites of symlink target\n", mdatalen));
709 
710 	}
711 
712 	ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &alloclen,
713 				JFFS2_SUMMARY_INODE_SIZE);
714 	if (ret) {
715 		printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
716 		       sizeof(ri)+ mdatalen, ret);
717 		goto out;
718 	}
719 
720 	last_frag = frag_last(&f->fragtree);
721 	if (last_frag)
722 		/* Fetch the inode length from the fragtree rather then
723 		 * from i_size since i_size may have not been updated yet */
724 		ilen = last_frag->ofs + last_frag->size;
725 	else
726 		ilen = JFFS2_F_I_SIZE(f);
727 
728 	memset(&ri, 0, sizeof(ri));
729 	ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
730 	ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
731 	ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
732 	ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
733 
734 	ri.ino = cpu_to_je32(f->inocache->ino);
735 	ri.version = cpu_to_je32(++f->highest_version);
736 	ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
737 	ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
738 	ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
739 	ri.isize = cpu_to_je32(ilen);
740 	ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
741 	ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
742 	ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
743 	ri.offset = cpu_to_je32(0);
744 	ri.csize = cpu_to_je32(mdatalen);
745 	ri.dsize = cpu_to_je32(mdatalen);
746 	ri.compr = JFFS2_COMPR_NONE;
747 	ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
748 	ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
749 
750 	new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, ALLOC_GC);
751 
752 	if (IS_ERR(new_fn)) {
753 		printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
754 		ret = PTR_ERR(new_fn);
755 		goto out;
756 	}
757 	jffs2_mark_node_obsolete(c, fn->raw);
758 	jffs2_free_full_dnode(fn);
759 	f->metadata = new_fn;
760  out:
761 	if (S_ISLNK(JFFS2_F_I_MODE(f)))
762 		kfree(mdata);
763 	return ret;
764 }
765 
766 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
767 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
768 {
769 	struct jffs2_full_dirent *new_fd;
770 	struct jffs2_raw_dirent rd;
771 	uint32_t alloclen;
772 	int ret;
773 
774 	rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
775 	rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
776 	rd.nsize = strlen(fd->name);
777 	rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
778 	rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
779 
780 	rd.pino = cpu_to_je32(f->inocache->ino);
781 	rd.version = cpu_to_je32(++f->highest_version);
782 	rd.ino = cpu_to_je32(fd->ino);
783 	/* If the times on this inode were set by explicit utime() they can be different,
784 	   so refrain from splatting them. */
785 	if (JFFS2_F_I_MTIME(f) == JFFS2_F_I_CTIME(f))
786 		rd.mctime = cpu_to_je32(JFFS2_F_I_MTIME(f));
787 	else
788 		rd.mctime = cpu_to_je32(0);
789 	rd.type = fd->type;
790 	rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
791 	rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
792 
793 	ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &alloclen,
794 				JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize));
795 	if (ret) {
796 		printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
797 		       sizeof(rd)+rd.nsize, ret);
798 		return ret;
799 	}
800 	new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, ALLOC_GC);
801 
802 	if (IS_ERR(new_fd)) {
803 		printk(KERN_WARNING "jffs2_write_dirent in garbage_collect_dirent failed: %ld\n", PTR_ERR(new_fd));
804 		return PTR_ERR(new_fd);
805 	}
806 	jffs2_add_fd_to_list(c, new_fd, &f->dents);
807 	return 0;
808 }
809 
810 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
811 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
812 {
813 	struct jffs2_full_dirent **fdp = &f->dents;
814 	int found = 0;
815 
816 	/* On a medium where we can't actually mark nodes obsolete
817 	   pernamently, such as NAND flash, we need to work out
818 	   whether this deletion dirent is still needed to actively
819 	   delete a 'real' dirent with the same name that's still
820 	   somewhere else on the flash. */
821 	if (!jffs2_can_mark_obsolete(c)) {
822 		struct jffs2_raw_dirent *rd;
823 		struct jffs2_raw_node_ref *raw;
824 		int ret;
825 		size_t retlen;
826 		int name_len = strlen(fd->name);
827 		uint32_t name_crc = crc32(0, fd->name, name_len);
828 		uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
829 
830 		rd = kmalloc(rawlen, GFP_KERNEL);
831 		if (!rd)
832 			return -ENOMEM;
833 
834 		/* Prevent the erase code from nicking the obsolete node refs while
835 		   we're looking at them. I really don't like this extra lock but
836 		   can't see any alternative. Suggestions on a postcard to... */
837 		down(&c->erase_free_sem);
838 
839 		for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
840 
841 			cond_resched();
842 
843 			/* We only care about obsolete ones */
844 			if (!(ref_obsolete(raw)))
845 				continue;
846 
847 			/* Any dirent with the same name is going to have the same length... */
848 			if (ref_totlen(c, NULL, raw) != rawlen)
849 				continue;
850 
851 			/* Doesn't matter if there's one in the same erase block. We're going to
852 			   delete it too at the same time. */
853 			if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
854 				continue;
855 
856 			D1(printk(KERN_DEBUG "Check potential deletion dirent at %08x\n", ref_offset(raw)));
857 
858 			/* This is an obsolete node belonging to the same directory, and it's of the right
859 			   length. We need to take a closer look...*/
860 			ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
861 			if (ret) {
862 				printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading obsolete node at %08x\n", ret, ref_offset(raw));
863 				/* If we can't read it, we don't need to continue to obsolete it. Continue */
864 				continue;
865 			}
866 			if (retlen != rawlen) {
867 				printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
868 				       retlen, rawlen, ref_offset(raw));
869 				continue;
870 			}
871 
872 			if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
873 				continue;
874 
875 			/* If the name CRC doesn't match, skip */
876 			if (je32_to_cpu(rd->name_crc) != name_crc)
877 				continue;
878 
879 			/* If the name length doesn't match, or it's another deletion dirent, skip */
880 			if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
881 				continue;
882 
883 			/* OK, check the actual name now */
884 			if (memcmp(rd->name, fd->name, name_len))
885 				continue;
886 
887 			/* OK. The name really does match. There really is still an older node on
888 			   the flash which our deletion dirent obsoletes. So we have to write out
889 			   a new deletion dirent to replace it */
890 			up(&c->erase_free_sem);
891 
892 			D1(printk(KERN_DEBUG "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
893 				  ref_offset(fd->raw), fd->name, ref_offset(raw), je32_to_cpu(rd->ino)));
894 			kfree(rd);
895 
896 			return jffs2_garbage_collect_dirent(c, jeb, f, fd);
897 		}
898 
899 		up(&c->erase_free_sem);
900 		kfree(rd);
901 	}
902 
903 	/* FIXME: If we're deleting a dirent which contains the current mtime and ctime,
904 	   we should update the metadata node with those times accordingly */
905 
906 	/* No need for it any more. Just mark it obsolete and remove it from the list */
907 	while (*fdp) {
908 		if ((*fdp) == fd) {
909 			found = 1;
910 			*fdp = fd->next;
911 			break;
912 		}
913 		fdp = &(*fdp)->next;
914 	}
915 	if (!found) {
916 		printk(KERN_WARNING "Deletion dirent \"%s\" not found in list for ino #%u\n", fd->name, f->inocache->ino);
917 	}
918 	jffs2_mark_node_obsolete(c, fd->raw);
919 	jffs2_free_full_dirent(fd);
920 	return 0;
921 }
922 
923 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
924 				      struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
925 				      uint32_t start, uint32_t end)
926 {
927 	struct jffs2_raw_inode ri;
928 	struct jffs2_node_frag *frag;
929 	struct jffs2_full_dnode *new_fn;
930 	uint32_t alloclen, ilen;
931 	int ret;
932 
933 	D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
934 		  f->inocache->ino, start, end));
935 
936 	memset(&ri, 0, sizeof(ri));
937 
938 	if(fn->frags > 1) {
939 		size_t readlen;
940 		uint32_t crc;
941 		/* It's partially obsoleted by a later write. So we have to
942 		   write it out again with the _same_ version as before */
943 		ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
944 		if (readlen != sizeof(ri) || ret) {
945 			printk(KERN_WARNING "Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n", ret, readlen);
946 			goto fill;
947 		}
948 		if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
949 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
950 			       ref_offset(fn->raw),
951 			       je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
952 			return -EIO;
953 		}
954 		if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
955 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
956 			       ref_offset(fn->raw),
957 			       je32_to_cpu(ri.totlen), sizeof(ri));
958 			return -EIO;
959 		}
960 		crc = crc32(0, &ri, sizeof(ri)-8);
961 		if (crc != je32_to_cpu(ri.node_crc)) {
962 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
963 			       ref_offset(fn->raw),
964 			       je32_to_cpu(ri.node_crc), crc);
965 			/* FIXME: We could possibly deal with this by writing new holes for each frag */
966 			printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
967 			       start, end, f->inocache->ino);
968 			goto fill;
969 		}
970 		if (ri.compr != JFFS2_COMPR_ZERO) {
971 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node 0x%08x wasn't a hole node!\n", ref_offset(fn->raw));
972 			printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
973 			       start, end, f->inocache->ino);
974 			goto fill;
975 		}
976 	} else {
977 	fill:
978 		ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
979 		ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
980 		ri.totlen = cpu_to_je32(sizeof(ri));
981 		ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
982 
983 		ri.ino = cpu_to_je32(f->inocache->ino);
984 		ri.version = cpu_to_je32(++f->highest_version);
985 		ri.offset = cpu_to_je32(start);
986 		ri.dsize = cpu_to_je32(end - start);
987 		ri.csize = cpu_to_je32(0);
988 		ri.compr = JFFS2_COMPR_ZERO;
989 	}
990 
991 	frag = frag_last(&f->fragtree);
992 	if (frag)
993 		/* Fetch the inode length from the fragtree rather then
994 		 * from i_size since i_size may have not been updated yet */
995 		ilen = frag->ofs + frag->size;
996 	else
997 		ilen = JFFS2_F_I_SIZE(f);
998 
999 	ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1000 	ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1001 	ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1002 	ri.isize = cpu_to_je32(ilen);
1003 	ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1004 	ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1005 	ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1006 	ri.data_crc = cpu_to_je32(0);
1007 	ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1008 
1009 	ret = jffs2_reserve_space_gc(c, sizeof(ri), &alloclen,
1010 				     JFFS2_SUMMARY_INODE_SIZE);
1011 	if (ret) {
1012 		printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
1013 		       sizeof(ri), ret);
1014 		return ret;
1015 	}
1016 	new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, ALLOC_GC);
1017 
1018 	if (IS_ERR(new_fn)) {
1019 		printk(KERN_WARNING "Error writing new hole node: %ld\n", PTR_ERR(new_fn));
1020 		return PTR_ERR(new_fn);
1021 	}
1022 	if (je32_to_cpu(ri.version) == f->highest_version) {
1023 		jffs2_add_full_dnode_to_inode(c, f, new_fn);
1024 		if (f->metadata) {
1025 			jffs2_mark_node_obsolete(c, f->metadata->raw);
1026 			jffs2_free_full_dnode(f->metadata);
1027 			f->metadata = NULL;
1028 		}
1029 		return 0;
1030 	}
1031 
1032 	/*
1033 	 * We should only get here in the case where the node we are
1034 	 * replacing had more than one frag, so we kept the same version
1035 	 * number as before. (Except in case of error -- see 'goto fill;'
1036 	 * above.)
1037 	 */
1038 	D1(if(unlikely(fn->frags <= 1)) {
1039 		printk(KERN_WARNING "jffs2_garbage_collect_hole: Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
1040 		       fn->frags, je32_to_cpu(ri.version), f->highest_version,
1041 		       je32_to_cpu(ri.ino));
1042 	});
1043 
1044 	/* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
1045 	mark_ref_normal(new_fn->raw);
1046 
1047 	for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs);
1048 	     frag; frag = frag_next(frag)) {
1049 		if (frag->ofs > fn->size + fn->ofs)
1050 			break;
1051 		if (frag->node == fn) {
1052 			frag->node = new_fn;
1053 			new_fn->frags++;
1054 			fn->frags--;
1055 		}
1056 	}
1057 	if (fn->frags) {
1058 		printk(KERN_WARNING "jffs2_garbage_collect_hole: Old node still has frags!\n");
1059 		BUG();
1060 	}
1061 	if (!new_fn->frags) {
1062 		printk(KERN_WARNING "jffs2_garbage_collect_hole: New node has no frags!\n");
1063 		BUG();
1064 	}
1065 
1066 	jffs2_mark_node_obsolete(c, fn->raw);
1067 	jffs2_free_full_dnode(fn);
1068 
1069 	return 0;
1070 }
1071 
1072 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
1073 				       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1074 				       uint32_t start, uint32_t end)
1075 {
1076 	struct jffs2_full_dnode *new_fn;
1077 	struct jffs2_raw_inode ri;
1078 	uint32_t alloclen, offset, orig_end, orig_start;
1079 	int ret = 0;
1080 	unsigned char *comprbuf = NULL, *writebuf;
1081 	unsigned long pg;
1082 	unsigned char *pg_ptr;
1083 
1084 	memset(&ri, 0, sizeof(ri));
1085 
1086 	D1(printk(KERN_DEBUG "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1087 		  f->inocache->ino, start, end));
1088 
1089 	orig_end = end;
1090 	orig_start = start;
1091 
1092 	if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1093 		/* Attempt to do some merging. But only expand to cover logically
1094 		   adjacent frags if the block containing them is already considered
1095 		   to be dirty. Otherwise we end up with GC just going round in
1096 		   circles dirtying the nodes it already wrote out, especially
1097 		   on NAND where we have small eraseblocks and hence a much higher
1098 		   chance of nodes having to be split to cross boundaries. */
1099 
1100 		struct jffs2_node_frag *frag;
1101 		uint32_t min, max;
1102 
1103 		min = start & ~(PAGE_CACHE_SIZE-1);
1104 		max = min + PAGE_CACHE_SIZE;
1105 
1106 		frag = jffs2_lookup_node_frag(&f->fragtree, start);
1107 
1108 		/* BUG_ON(!frag) but that'll happen anyway... */
1109 
1110 		BUG_ON(frag->ofs != start);
1111 
1112 		/* First grow down... */
1113 		while((frag = frag_prev(frag)) && frag->ofs >= min) {
1114 
1115 			/* If the previous frag doesn't even reach the beginning, there's
1116 			   excessive fragmentation. Just merge. */
1117 			if (frag->ofs > min) {
1118 				D1(printk(KERN_DEBUG "Expanding down to cover partial frag (0x%x-0x%x)\n",
1119 					  frag->ofs, frag->ofs+frag->size));
1120 				start = frag->ofs;
1121 				continue;
1122 			}
1123 			/* OK. This frag holds the first byte of the page. */
1124 			if (!frag->node || !frag->node->raw) {
1125 				D1(printk(KERN_DEBUG "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1126 					  frag->ofs, frag->ofs+frag->size));
1127 				break;
1128 			} else {
1129 
1130 				/* OK, it's a frag which extends to the beginning of the page. Does it live
1131 				   in a block which is still considered clean? If so, don't obsolete it.
1132 				   If not, cover it anyway. */
1133 
1134 				struct jffs2_raw_node_ref *raw = frag->node->raw;
1135 				struct jffs2_eraseblock *jeb;
1136 
1137 				jeb = &c->blocks[raw->flash_offset / c->sector_size];
1138 
1139 				if (jeb == c->gcblock) {
1140 					D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1141 						  frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1142 					start = frag->ofs;
1143 					break;
1144 				}
1145 				if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1146 					D1(printk(KERN_DEBUG "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1147 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1148 					break;
1149 				}
1150 
1151 				D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1152 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1153 				start = frag->ofs;
1154 				break;
1155 			}
1156 		}
1157 
1158 		/* ... then up */
1159 
1160 		/* Find last frag which is actually part of the node we're to GC. */
1161 		frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1162 
1163 		while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1164 
1165 			/* If the previous frag doesn't even reach the beginning, there's lots
1166 			   of fragmentation. Just merge. */
1167 			if (frag->ofs+frag->size < max) {
1168 				D1(printk(KERN_DEBUG "Expanding up to cover partial frag (0x%x-0x%x)\n",
1169 					  frag->ofs, frag->ofs+frag->size));
1170 				end = frag->ofs + frag->size;
1171 				continue;
1172 			}
1173 
1174 			if (!frag->node || !frag->node->raw) {
1175 				D1(printk(KERN_DEBUG "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1176 					  frag->ofs, frag->ofs+frag->size));
1177 				break;
1178 			} else {
1179 
1180 				/* OK, it's a frag which extends to the beginning of the page. Does it live
1181 				   in a block which is still considered clean? If so, don't obsolete it.
1182 				   If not, cover it anyway. */
1183 
1184 				struct jffs2_raw_node_ref *raw = frag->node->raw;
1185 				struct jffs2_eraseblock *jeb;
1186 
1187 				jeb = &c->blocks[raw->flash_offset / c->sector_size];
1188 
1189 				if (jeb == c->gcblock) {
1190 					D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1191 						  frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1192 					end = frag->ofs + frag->size;
1193 					break;
1194 				}
1195 				if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1196 					D1(printk(KERN_DEBUG "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1197 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1198 					break;
1199 				}
1200 
1201 				D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1202 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1203 				end = frag->ofs + frag->size;
1204 				break;
1205 			}
1206 		}
1207 		D1(printk(KERN_DEBUG "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
1208 			  orig_start, orig_end, start, end));
1209 
1210 		D1(BUG_ON(end > frag_last(&f->fragtree)->ofs + frag_last(&f->fragtree)->size));
1211 		BUG_ON(end < orig_end);
1212 		BUG_ON(start > orig_start);
1213 	}
1214 
1215 	/* First, use readpage() to read the appropriate page into the page cache */
1216 	/* Q: What happens if we actually try to GC the _same_ page for which commit_write()
1217 	 *    triggered garbage collection in the first place?
1218 	 * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
1219 	 *    page OK. We'll actually write it out again in commit_write, which is a little
1220 	 *    suboptimal, but at least we're correct.
1221 	 */
1222 	pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
1223 
1224 	if (IS_ERR(pg_ptr)) {
1225 		printk(KERN_WARNING "read_cache_page() returned error: %ld\n", PTR_ERR(pg_ptr));
1226 		return PTR_ERR(pg_ptr);
1227 	}
1228 
1229 	offset = start;
1230 	while(offset < orig_end) {
1231 		uint32_t datalen;
1232 		uint32_t cdatalen;
1233 		uint16_t comprtype = JFFS2_COMPR_NONE;
1234 
1235 		ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN,
1236 					&alloclen, JFFS2_SUMMARY_INODE_SIZE);
1237 
1238 		if (ret) {
1239 			printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1240 			       sizeof(ri)+ JFFS2_MIN_DATA_LEN, ret);
1241 			break;
1242 		}
1243 		cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1244 		datalen = end - offset;
1245 
1246 		writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
1247 
1248 		comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1249 
1250 		ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1251 		ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1252 		ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1253 		ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1254 
1255 		ri.ino = cpu_to_je32(f->inocache->ino);
1256 		ri.version = cpu_to_je32(++f->highest_version);
1257 		ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1258 		ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1259 		ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1260 		ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1261 		ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1262 		ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1263 		ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1264 		ri.offset = cpu_to_je32(offset);
1265 		ri.csize = cpu_to_je32(cdatalen);
1266 		ri.dsize = cpu_to_je32(datalen);
1267 		ri.compr = comprtype & 0xff;
1268 		ri.usercompr = (comprtype >> 8) & 0xff;
1269 		ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1270 		ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
1271 
1272 		new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, ALLOC_GC);
1273 
1274 		jffs2_free_comprbuf(comprbuf, writebuf);
1275 
1276 		if (IS_ERR(new_fn)) {
1277 			printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
1278 			ret = PTR_ERR(new_fn);
1279 			break;
1280 		}
1281 		ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1282 		offset += datalen;
1283 		if (f->metadata) {
1284 			jffs2_mark_node_obsolete(c, f->metadata->raw);
1285 			jffs2_free_full_dnode(f->metadata);
1286 			f->metadata = NULL;
1287 		}
1288 	}
1289 
1290 	jffs2_gc_release_page(c, pg_ptr, &pg);
1291 	return ret;
1292 }
1293