xref: /linux/fs/jffs2/gc.c (revision f3d9478b2ce468c3115b02ecae7e975990697f15)
1 /*
2  * JFFS2 -- Journalling Flash File System, Version 2.
3  *
4  * Copyright (C) 2001-2003 Red Hat, Inc.
5  *
6  * Created by David Woodhouse <dwmw2@infradead.org>
7  *
8  * For licensing information, see the file 'LICENCE' in this directory.
9  *
10  * $Id: gc.c,v 1.155 2005/11/07 11:14:39 gleixner Exp $
11  *
12  */
13 
14 #include <linux/kernel.h>
15 #include <linux/mtd/mtd.h>
16 #include <linux/slab.h>
17 #include <linux/pagemap.h>
18 #include <linux/crc32.h>
19 #include <linux/compiler.h>
20 #include <linux/stat.h>
21 #include "nodelist.h"
22 #include "compr.h"
23 
24 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
25 					  struct jffs2_inode_cache *ic,
26 					  struct jffs2_raw_node_ref *raw);
27 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
28 					struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
29 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
30 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
31 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
32 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
33 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
34 				      struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
35 				      uint32_t start, uint32_t end);
36 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
37 				       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
38 				       uint32_t start, uint32_t end);
39 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
40 			       struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
41 
42 /* Called with erase_completion_lock held */
43 static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
44 {
45 	struct jffs2_eraseblock *ret;
46 	struct list_head *nextlist = NULL;
47 	int n = jiffies % 128;
48 
49 	/* Pick an eraseblock to garbage collect next. This is where we'll
50 	   put the clever wear-levelling algorithms. Eventually.  */
51 	/* We possibly want to favour the dirtier blocks more when the
52 	   number of free blocks is low. */
53 again:
54 	if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
55 		D1(printk(KERN_DEBUG "Picking block from bad_used_list to GC next\n"));
56 		nextlist = &c->bad_used_list;
57 	} else if (n < 50 && !list_empty(&c->erasable_list)) {
58 		/* Note that most of them will have gone directly to be erased.
59 		   So don't favour the erasable_list _too_ much. */
60 		D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next\n"));
61 		nextlist = &c->erasable_list;
62 	} else if (n < 110 && !list_empty(&c->very_dirty_list)) {
63 		/* Most of the time, pick one off the very_dirty list */
64 		D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next\n"));
65 		nextlist = &c->very_dirty_list;
66 	} else if (n < 126 && !list_empty(&c->dirty_list)) {
67 		D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next\n"));
68 		nextlist = &c->dirty_list;
69 	} else if (!list_empty(&c->clean_list)) {
70 		D1(printk(KERN_DEBUG "Picking block from clean_list to GC next\n"));
71 		nextlist = &c->clean_list;
72 	} else if (!list_empty(&c->dirty_list)) {
73 		D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next (clean_list was empty)\n"));
74 
75 		nextlist = &c->dirty_list;
76 	} else if (!list_empty(&c->very_dirty_list)) {
77 		D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n"));
78 		nextlist = &c->very_dirty_list;
79 	} else if (!list_empty(&c->erasable_list)) {
80 		D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n"));
81 
82 		nextlist = &c->erasable_list;
83 	} else if (!list_empty(&c->erasable_pending_wbuf_list)) {
84 		/* There are blocks are wating for the wbuf sync */
85 		D1(printk(KERN_DEBUG "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n"));
86 		spin_unlock(&c->erase_completion_lock);
87 		jffs2_flush_wbuf_pad(c);
88 		spin_lock(&c->erase_completion_lock);
89 		goto again;
90 	} else {
91 		/* Eep. All were empty */
92 		D1(printk(KERN_NOTICE "jffs2: No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n"));
93 		return NULL;
94 	}
95 
96 	ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
97 	list_del(&ret->list);
98 	c->gcblock = ret;
99 	ret->gc_node = ret->first_node;
100 	if (!ret->gc_node) {
101 		printk(KERN_WARNING "Eep. ret->gc_node for block at 0x%08x is NULL\n", ret->offset);
102 		BUG();
103 	}
104 
105 	/* Have we accidentally picked a clean block with wasted space ? */
106 	if (ret->wasted_size) {
107 		D1(printk(KERN_DEBUG "Converting wasted_size %08x to dirty_size\n", ret->wasted_size));
108 		ret->dirty_size += ret->wasted_size;
109 		c->wasted_size -= ret->wasted_size;
110 		c->dirty_size += ret->wasted_size;
111 		ret->wasted_size = 0;
112 	}
113 
114 	return ret;
115 }
116 
117 /* jffs2_garbage_collect_pass
118  * Make a single attempt to progress GC. Move one node, and possibly
119  * start erasing one eraseblock.
120  */
121 int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
122 {
123 	struct jffs2_inode_info *f;
124 	struct jffs2_inode_cache *ic;
125 	struct jffs2_eraseblock *jeb;
126 	struct jffs2_raw_node_ref *raw;
127 	int ret = 0, inum, nlink;
128 	int xattr = 0;
129 
130 	if (down_interruptible(&c->alloc_sem))
131 		return -EINTR;
132 
133 	for (;;) {
134 		spin_lock(&c->erase_completion_lock);
135 		if (!c->unchecked_size)
136 			break;
137 
138 		/* We can't start doing GC yet. We haven't finished checking
139 		   the node CRCs etc. Do it now. */
140 
141 		/* checked_ino is protected by the alloc_sem */
142 		if (c->checked_ino > c->highest_ino && xattr) {
143 			printk(KERN_CRIT "Checked all inodes but still 0x%x bytes of unchecked space?\n",
144 			       c->unchecked_size);
145 			jffs2_dbg_dump_block_lists_nolock(c);
146 			spin_unlock(&c->erase_completion_lock);
147 			BUG();
148 		}
149 
150 		spin_unlock(&c->erase_completion_lock);
151 
152 		if (!xattr)
153 			xattr = jffs2_verify_xattr(c);
154 
155 		spin_lock(&c->inocache_lock);
156 
157 		ic = jffs2_get_ino_cache(c, c->checked_ino++);
158 
159 		if (!ic) {
160 			spin_unlock(&c->inocache_lock);
161 			continue;
162 		}
163 
164 		if (!ic->nlink) {
165 			D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink zero\n",
166 				  ic->ino));
167 			spin_unlock(&c->inocache_lock);
168 			continue;
169 		}
170 		switch(ic->state) {
171 		case INO_STATE_CHECKEDABSENT:
172 		case INO_STATE_PRESENT:
173 			D1(printk(KERN_DEBUG "Skipping ino #%u already checked\n", ic->ino));
174 			spin_unlock(&c->inocache_lock);
175 			continue;
176 
177 		case INO_STATE_GC:
178 		case INO_STATE_CHECKING:
179 			printk(KERN_WARNING "Inode #%u is in state %d during CRC check phase!\n", ic->ino, ic->state);
180 			spin_unlock(&c->inocache_lock);
181 			BUG();
182 
183 		case INO_STATE_READING:
184 			/* We need to wait for it to finish, lest we move on
185 			   and trigger the BUG() above while we haven't yet
186 			   finished checking all its nodes */
187 			D1(printk(KERN_DEBUG "Waiting for ino #%u to finish reading\n", ic->ino));
188 			/* We need to come back again for the _same_ inode. We've
189 			 made no progress in this case, but that should be OK */
190 			c->checked_ino--;
191 
192 			up(&c->alloc_sem);
193 			sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
194 			return 0;
195 
196 		default:
197 			BUG();
198 
199 		case INO_STATE_UNCHECKED:
200 			;
201 		}
202 		ic->state = INO_STATE_CHECKING;
203 		spin_unlock(&c->inocache_lock);
204 
205 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() triggering inode scan of ino#%u\n", ic->ino));
206 
207 		ret = jffs2_do_crccheck_inode(c, ic);
208 		if (ret)
209 			printk(KERN_WARNING "Returned error for crccheck of ino #%u. Expect badness...\n", ic->ino);
210 
211 		jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
212 		up(&c->alloc_sem);
213 		return ret;
214 	}
215 
216 	/* First, work out which block we're garbage-collecting */
217 	jeb = c->gcblock;
218 
219 	if (!jeb)
220 		jeb = jffs2_find_gc_block(c);
221 
222 	if (!jeb) {
223 		D1 (printk(KERN_NOTICE "jffs2: Couldn't find erase block to garbage collect!\n"));
224 		spin_unlock(&c->erase_completion_lock);
225 		up(&c->alloc_sem);
226 		return -EIO;
227 	}
228 
229 	D1(printk(KERN_DEBUG "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n", jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size));
230 	D1(if (c->nextblock)
231 	   printk(KERN_DEBUG "Nextblock at  %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
232 
233 	if (!jeb->used_size) {
234 		up(&c->alloc_sem);
235 		goto eraseit;
236 	}
237 
238 	raw = jeb->gc_node;
239 
240 	while(ref_obsolete(raw)) {
241 		D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw)));
242 		raw = ref_next(raw);
243 		if (unlikely(!raw)) {
244 			printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n");
245 			printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
246 			       jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size);
247 			jeb->gc_node = raw;
248 			spin_unlock(&c->erase_completion_lock);
249 			up(&c->alloc_sem);
250 			BUG();
251 		}
252 	}
253 	jeb->gc_node = raw;
254 
255 	D1(printk(KERN_DEBUG "Going to garbage collect node at 0x%08x\n", ref_offset(raw)));
256 
257 	if (!raw->next_in_ino) {
258 		/* Inode-less node. Clean marker, snapshot or something like that */
259 		spin_unlock(&c->erase_completion_lock);
260 		if (ref_flags(raw) == REF_PRISTINE) {
261 			/* It's an unknown node with JFFS2_FEATURE_RWCOMPAT_COPY */
262 			jffs2_garbage_collect_pristine(c, NULL, raw);
263 		} else {
264 			/* Just mark it obsolete */
265 			jffs2_mark_node_obsolete(c, raw);
266 		}
267 		up(&c->alloc_sem);
268 		goto eraseit_lock;
269 	}
270 
271 	ic = jffs2_raw_ref_to_ic(raw);
272 
273 #ifdef CONFIG_JFFS2_FS_XATTR
274 	/* When 'ic' refers xattr_datum/xattr_ref, this node is GCed as xattr.
275 	 * We can decide whether this node is inode or xattr by ic->class.     */
276 	if (ic->class == RAWNODE_CLASS_XATTR_DATUM
277 	    || ic->class == RAWNODE_CLASS_XATTR_REF) {
278 		BUG_ON(raw->next_in_ino != (void *)ic);
279 		spin_unlock(&c->erase_completion_lock);
280 
281 		if (ic->class == RAWNODE_CLASS_XATTR_DATUM) {
282 			ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic);
283 		} else {
284 			ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic);
285 		}
286 		goto release_sem;
287 	}
288 #endif
289 
290 	/* We need to hold the inocache. Either the erase_completion_lock or
291 	   the inocache_lock are sufficient; we trade down since the inocache_lock
292 	   causes less contention. */
293 	spin_lock(&c->inocache_lock);
294 
295 	spin_unlock(&c->erase_completion_lock);
296 
297 	D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), ic->ino));
298 
299 	/* Three possibilities:
300 	   1. Inode is already in-core. We must iget it and do proper
301 	      updating to its fragtree, etc.
302 	   2. Inode is not in-core, node is REF_PRISTINE. We lock the
303 	      inocache to prevent a read_inode(), copy the node intact.
304 	   3. Inode is not in-core, node is not pristine. We must iget()
305 	      and take the slow path.
306 	*/
307 
308 	switch(ic->state) {
309 	case INO_STATE_CHECKEDABSENT:
310 		/* It's been checked, but it's not currently in-core.
311 		   We can just copy any pristine nodes, but have
312 		   to prevent anyone else from doing read_inode() while
313 		   we're at it, so we set the state accordingly */
314 		if (ref_flags(raw) == REF_PRISTINE)
315 			ic->state = INO_STATE_GC;
316 		else {
317 			D1(printk(KERN_DEBUG "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
318 				  ic->ino));
319 		}
320 		break;
321 
322 	case INO_STATE_PRESENT:
323 		/* It's in-core. GC must iget() it. */
324 		break;
325 
326 	case INO_STATE_UNCHECKED:
327 	case INO_STATE_CHECKING:
328 	case INO_STATE_GC:
329 		/* Should never happen. We should have finished checking
330 		   by the time we actually start doing any GC, and since
331 		   we're holding the alloc_sem, no other garbage collection
332 		   can happen.
333 		*/
334 		printk(KERN_CRIT "Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
335 		       ic->ino, ic->state);
336 		up(&c->alloc_sem);
337 		spin_unlock(&c->inocache_lock);
338 		BUG();
339 
340 	case INO_STATE_READING:
341 		/* Someone's currently trying to read it. We must wait for
342 		   them to finish and then go through the full iget() route
343 		   to do the GC. However, sometimes read_inode() needs to get
344 		   the alloc_sem() (for marking nodes invalid) so we must
345 		   drop the alloc_sem before sleeping. */
346 
347 		up(&c->alloc_sem);
348 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n",
349 			  ic->ino, ic->state));
350 		sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
351 		/* And because we dropped the alloc_sem we must start again from the
352 		   beginning. Ponder chance of livelock here -- we're returning success
353 		   without actually making any progress.
354 
355 		   Q: What are the chances that the inode is back in INO_STATE_READING
356 		   again by the time we next enter this function? And that this happens
357 		   enough times to cause a real delay?
358 
359 		   A: Small enough that I don't care :)
360 		*/
361 		return 0;
362 	}
363 
364 	/* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
365 	   node intact, and we don't have to muck about with the fragtree etc.
366 	   because we know it's not in-core. If it _was_ in-core, we go through
367 	   all the iget() crap anyway */
368 
369 	if (ic->state == INO_STATE_GC) {
370 		spin_unlock(&c->inocache_lock);
371 
372 		ret = jffs2_garbage_collect_pristine(c, ic, raw);
373 
374 		spin_lock(&c->inocache_lock);
375 		ic->state = INO_STATE_CHECKEDABSENT;
376 		wake_up(&c->inocache_wq);
377 
378 		if (ret != -EBADFD) {
379 			spin_unlock(&c->inocache_lock);
380 			goto release_sem;
381 		}
382 
383 		/* Fall through if it wanted us to, with inocache_lock held */
384 	}
385 
386 	/* Prevent the fairly unlikely race where the gcblock is
387 	   entirely obsoleted by the final close of a file which had
388 	   the only valid nodes in the block, followed by erasure,
389 	   followed by freeing of the ic because the erased block(s)
390 	   held _all_ the nodes of that inode.... never been seen but
391 	   it's vaguely possible. */
392 
393 	inum = ic->ino;
394 	nlink = ic->nlink;
395 	spin_unlock(&c->inocache_lock);
396 
397 	f = jffs2_gc_fetch_inode(c, inum, nlink);
398 	if (IS_ERR(f)) {
399 		ret = PTR_ERR(f);
400 		goto release_sem;
401 	}
402 	if (!f) {
403 		ret = 0;
404 		goto release_sem;
405 	}
406 
407 	ret = jffs2_garbage_collect_live(c, jeb, raw, f);
408 
409 	jffs2_gc_release_inode(c, f);
410 
411  release_sem:
412 	up(&c->alloc_sem);
413 
414  eraseit_lock:
415 	/* If we've finished this block, start it erasing */
416 	spin_lock(&c->erase_completion_lock);
417 
418  eraseit:
419 	if (c->gcblock && !c->gcblock->used_size) {
420 		D1(printk(KERN_DEBUG "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n", c->gcblock->offset));
421 		/* We're GC'ing an empty block? */
422 		list_add_tail(&c->gcblock->list, &c->erase_pending_list);
423 		c->gcblock = NULL;
424 		c->nr_erasing_blocks++;
425 		jffs2_erase_pending_trigger(c);
426 	}
427 	spin_unlock(&c->erase_completion_lock);
428 
429 	return ret;
430 }
431 
432 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
433 				      struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
434 {
435 	struct jffs2_node_frag *frag;
436 	struct jffs2_full_dnode *fn = NULL;
437 	struct jffs2_full_dirent *fd;
438 	uint32_t start = 0, end = 0, nrfrags = 0;
439 	int ret = 0;
440 
441 	down(&f->sem);
442 
443 	/* Now we have the lock for this inode. Check that it's still the one at the head
444 	   of the list. */
445 
446 	spin_lock(&c->erase_completion_lock);
447 
448 	if (c->gcblock != jeb) {
449 		spin_unlock(&c->erase_completion_lock);
450 		D1(printk(KERN_DEBUG "GC block is no longer gcblock. Restart\n"));
451 		goto upnout;
452 	}
453 	if (ref_obsolete(raw)) {
454 		spin_unlock(&c->erase_completion_lock);
455 		D1(printk(KERN_DEBUG "node to be GC'd was obsoleted in the meantime.\n"));
456 		/* They'll call again */
457 		goto upnout;
458 	}
459 	spin_unlock(&c->erase_completion_lock);
460 
461 	/* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
462 	if (f->metadata && f->metadata->raw == raw) {
463 		fn = f->metadata;
464 		ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
465 		goto upnout;
466 	}
467 
468 	/* FIXME. Read node and do lookup? */
469 	for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
470 		if (frag->node && frag->node->raw == raw) {
471 			fn = frag->node;
472 			end = frag->ofs + frag->size;
473 			if (!nrfrags++)
474 				start = frag->ofs;
475 			if (nrfrags == frag->node->frags)
476 				break; /* We've found them all */
477 		}
478 	}
479 	if (fn) {
480 		if (ref_flags(raw) == REF_PRISTINE) {
481 			ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
482 			if (!ret) {
483 				/* Urgh. Return it sensibly. */
484 				frag->node->raw = f->inocache->nodes;
485 			}
486 			if (ret != -EBADFD)
487 				goto upnout;
488 		}
489 		/* We found a datanode. Do the GC */
490 		if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
491 			/* It crosses a page boundary. Therefore, it must be a hole. */
492 			ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
493 		} else {
494 			/* It could still be a hole. But we GC the page this way anyway */
495 			ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
496 		}
497 		goto upnout;
498 	}
499 
500 	/* Wasn't a dnode. Try dirent */
501 	for (fd = f->dents; fd; fd=fd->next) {
502 		if (fd->raw == raw)
503 			break;
504 	}
505 
506 	if (fd && fd->ino) {
507 		ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
508 	} else if (fd) {
509 		ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
510 	} else {
511 		printk(KERN_WARNING "Raw node at 0x%08x wasn't in node lists for ino #%u\n",
512 		       ref_offset(raw), f->inocache->ino);
513 		if (ref_obsolete(raw)) {
514 			printk(KERN_WARNING "But it's obsolete so we don't mind too much\n");
515 		} else {
516 			jffs2_dbg_dump_node(c, ref_offset(raw));
517 			BUG();
518 		}
519 	}
520  upnout:
521 	up(&f->sem);
522 
523 	return ret;
524 }
525 
526 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
527 					  struct jffs2_inode_cache *ic,
528 					  struct jffs2_raw_node_ref *raw)
529 {
530 	union jffs2_node_union *node;
531 	size_t retlen;
532 	int ret;
533 	uint32_t phys_ofs, alloclen;
534 	uint32_t crc, rawlen;
535 	int retried = 0;
536 
537 	D1(printk(KERN_DEBUG "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw)));
538 
539 	alloclen = rawlen = ref_totlen(c, c->gcblock, raw);
540 
541 	/* Ask for a small amount of space (or the totlen if smaller) because we
542 	   don't want to force wastage of the end of a block if splitting would
543 	   work. */
544 	if (ic && alloclen > sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN)
545 		alloclen = sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN;
546 
547 	ret = jffs2_reserve_space_gc(c, alloclen, &alloclen, rawlen);
548 	/* 'rawlen' is not the exact summary size; it is only an upper estimation */
549 
550 	if (ret)
551 		return ret;
552 
553 	if (alloclen < rawlen) {
554 		/* Doesn't fit untouched. We'll go the old route and split it */
555 		return -EBADFD;
556 	}
557 
558 	node = kmalloc(rawlen, GFP_KERNEL);
559 	if (!node)
560                return -ENOMEM;
561 
562 	ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
563 	if (!ret && retlen != rawlen)
564 		ret = -EIO;
565 	if (ret)
566 		goto out_node;
567 
568 	crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
569 	if (je32_to_cpu(node->u.hdr_crc) != crc) {
570 		printk(KERN_WARNING "Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
571 		       ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
572 		goto bail;
573 	}
574 
575 	switch(je16_to_cpu(node->u.nodetype)) {
576 	case JFFS2_NODETYPE_INODE:
577 		crc = crc32(0, node, sizeof(node->i)-8);
578 		if (je32_to_cpu(node->i.node_crc) != crc) {
579 			printk(KERN_WARNING "Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
580 			       ref_offset(raw), je32_to_cpu(node->i.node_crc), crc);
581 			goto bail;
582 		}
583 
584 		if (je32_to_cpu(node->i.dsize)) {
585 			crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
586 			if (je32_to_cpu(node->i.data_crc) != crc) {
587 				printk(KERN_WARNING "Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
588 				       ref_offset(raw), je32_to_cpu(node->i.data_crc), crc);
589 				goto bail;
590 			}
591 		}
592 		break;
593 
594 	case JFFS2_NODETYPE_DIRENT:
595 		crc = crc32(0, node, sizeof(node->d)-8);
596 		if (je32_to_cpu(node->d.node_crc) != crc) {
597 			printk(KERN_WARNING "Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
598 			       ref_offset(raw), je32_to_cpu(node->d.node_crc), crc);
599 			goto bail;
600 		}
601 
602 		if (node->d.nsize) {
603 			crc = crc32(0, node->d.name, node->d.nsize);
604 			if (je32_to_cpu(node->d.name_crc) != crc) {
605 				printk(KERN_WARNING "Name CRC failed on REF_PRISTINE dirent ode at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
606 				       ref_offset(raw), je32_to_cpu(node->d.name_crc), crc);
607 				goto bail;
608 			}
609 		}
610 		break;
611 	default:
612 		/* If it's inode-less, we don't _know_ what it is. Just copy it intact */
613 		if (ic) {
614 			printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
615 			       ref_offset(raw), je16_to_cpu(node->u.nodetype));
616 			goto bail;
617 		}
618 	}
619 
620 	/* OK, all the CRCs are good; this node can just be copied as-is. */
621  retry:
622 	phys_ofs = write_ofs(c);
623 
624 	ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
625 
626 	if (ret || (retlen != rawlen)) {
627 		printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
628                        rawlen, phys_ofs, ret, retlen);
629 		if (retlen) {
630 			jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, rawlen, NULL);
631 		} else {
632 			printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", phys_ofs);
633 		}
634 		if (!retried) {
635 			/* Try to reallocate space and retry */
636 			uint32_t dummy;
637 			struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
638 
639 			retried = 1;
640 
641 			D1(printk(KERN_DEBUG "Retrying failed write of REF_PRISTINE node.\n"));
642 
643 			jffs2_dbg_acct_sanity_check(c,jeb);
644 			jffs2_dbg_acct_paranoia_check(c, jeb);
645 
646 			ret = jffs2_reserve_space_gc(c, rawlen, &dummy, rawlen);
647 						/* this is not the exact summary size of it,
648 							it is only an upper estimation */
649 
650 			if (!ret) {
651 				D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", phys_ofs));
652 
653 				jffs2_dbg_acct_sanity_check(c,jeb);
654 				jffs2_dbg_acct_paranoia_check(c, jeb);
655 
656 				goto retry;
657 			}
658 			D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret));
659 		}
660 
661 		if (!ret)
662 			ret = -EIO;
663 		goto out_node;
664 	}
665 	jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, rawlen, ic);
666 
667 	jffs2_mark_node_obsolete(c, raw);
668 	D1(printk(KERN_DEBUG "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n", ref_offset(raw)));
669 
670  out_node:
671 	kfree(node);
672 	return ret;
673  bail:
674 	ret = -EBADFD;
675 	goto out_node;
676 }
677 
678 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
679 					struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
680 {
681 	struct jffs2_full_dnode *new_fn;
682 	struct jffs2_raw_inode ri;
683 	struct jffs2_node_frag *last_frag;
684 	union jffs2_device_node dev;
685 	char *mdata = NULL, mdatalen = 0;
686 	uint32_t alloclen, ilen;
687 	int ret;
688 
689 	if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
690 	    S_ISCHR(JFFS2_F_I_MODE(f)) ) {
691 		/* For these, we don't actually need to read the old node */
692 		mdatalen = jffs2_encode_dev(&dev, JFFS2_F_I_RDEV(f));
693 		mdata = (char *)&dev;
694 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bytes of kdev_t\n", mdatalen));
695 	} else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
696 		mdatalen = fn->size;
697 		mdata = kmalloc(fn->size, GFP_KERNEL);
698 		if (!mdata) {
699 			printk(KERN_WARNING "kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
700 			return -ENOMEM;
701 		}
702 		ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
703 		if (ret) {
704 			printk(KERN_WARNING "read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n", ret);
705 			kfree(mdata);
706 			return ret;
707 		}
708 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bites of symlink target\n", mdatalen));
709 
710 	}
711 
712 	ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &alloclen,
713 				JFFS2_SUMMARY_INODE_SIZE);
714 	if (ret) {
715 		printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
716 		       sizeof(ri)+ mdatalen, ret);
717 		goto out;
718 	}
719 
720 	last_frag = frag_last(&f->fragtree);
721 	if (last_frag)
722 		/* Fetch the inode length from the fragtree rather then
723 		 * from i_size since i_size may have not been updated yet */
724 		ilen = last_frag->ofs + last_frag->size;
725 	else
726 		ilen = JFFS2_F_I_SIZE(f);
727 
728 	memset(&ri, 0, sizeof(ri));
729 	ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
730 	ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
731 	ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
732 	ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
733 
734 	ri.ino = cpu_to_je32(f->inocache->ino);
735 	ri.version = cpu_to_je32(++f->highest_version);
736 	ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
737 	ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
738 	ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
739 	ri.isize = cpu_to_je32(ilen);
740 	ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
741 	ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
742 	ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
743 	ri.offset = cpu_to_je32(0);
744 	ri.csize = cpu_to_je32(mdatalen);
745 	ri.dsize = cpu_to_je32(mdatalen);
746 	ri.compr = JFFS2_COMPR_NONE;
747 	ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
748 	ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
749 
750 	new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, ALLOC_GC);
751 
752 	if (IS_ERR(new_fn)) {
753 		printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
754 		ret = PTR_ERR(new_fn);
755 		goto out;
756 	}
757 	jffs2_mark_node_obsolete(c, fn->raw);
758 	jffs2_free_full_dnode(fn);
759 	f->metadata = new_fn;
760  out:
761 	if (S_ISLNK(JFFS2_F_I_MODE(f)))
762 		kfree(mdata);
763 	return ret;
764 }
765 
766 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
767 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
768 {
769 	struct jffs2_full_dirent *new_fd;
770 	struct jffs2_raw_dirent rd;
771 	uint32_t alloclen;
772 	int ret;
773 
774 	rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
775 	rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
776 	rd.nsize = strlen(fd->name);
777 	rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
778 	rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
779 
780 	rd.pino = cpu_to_je32(f->inocache->ino);
781 	rd.version = cpu_to_je32(++f->highest_version);
782 	rd.ino = cpu_to_je32(fd->ino);
783 	/* If the times on this inode were set by explicit utime() they can be different,
784 	   so refrain from splatting them. */
785 	if (JFFS2_F_I_MTIME(f) == JFFS2_F_I_CTIME(f))
786 		rd.mctime = cpu_to_je32(JFFS2_F_I_MTIME(f));
787 	else
788 		rd.mctime = cpu_to_je32(0);
789 	rd.type = fd->type;
790 	rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
791 	rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
792 
793 	ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &alloclen,
794 				JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize));
795 	if (ret) {
796 		printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
797 		       sizeof(rd)+rd.nsize, ret);
798 		return ret;
799 	}
800 	new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, ALLOC_GC);
801 
802 	if (IS_ERR(new_fd)) {
803 		printk(KERN_WARNING "jffs2_write_dirent in garbage_collect_dirent failed: %ld\n", PTR_ERR(new_fd));
804 		return PTR_ERR(new_fd);
805 	}
806 	jffs2_add_fd_to_list(c, new_fd, &f->dents);
807 	return 0;
808 }
809 
810 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
811 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
812 {
813 	struct jffs2_full_dirent **fdp = &f->dents;
814 	int found = 0;
815 
816 	/* On a medium where we can't actually mark nodes obsolete
817 	   pernamently, such as NAND flash, we need to work out
818 	   whether this deletion dirent is still needed to actively
819 	   delete a 'real' dirent with the same name that's still
820 	   somewhere else on the flash. */
821 	if (!jffs2_can_mark_obsolete(c)) {
822 		struct jffs2_raw_dirent *rd;
823 		struct jffs2_raw_node_ref *raw;
824 		int ret;
825 		size_t retlen;
826 		int name_len = strlen(fd->name);
827 		uint32_t name_crc = crc32(0, fd->name, name_len);
828 		uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
829 
830 		rd = kmalloc(rawlen, GFP_KERNEL);
831 		if (!rd)
832 			return -ENOMEM;
833 
834 		/* Prevent the erase code from nicking the obsolete node refs while
835 		   we're looking at them. I really don't like this extra lock but
836 		   can't see any alternative. Suggestions on a postcard to... */
837 		down(&c->erase_free_sem);
838 
839 		for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
840 
841 			/* We only care about obsolete ones */
842 			if (!(ref_obsolete(raw)))
843 				continue;
844 
845 			/* Any dirent with the same name is going to have the same length... */
846 			if (ref_totlen(c, NULL, raw) != rawlen)
847 				continue;
848 
849 			/* Doesn't matter if there's one in the same erase block. We're going to
850 			   delete it too at the same time. */
851 			if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
852 				continue;
853 
854 			D1(printk(KERN_DEBUG "Check potential deletion dirent at %08x\n", ref_offset(raw)));
855 
856 			/* This is an obsolete node belonging to the same directory, and it's of the right
857 			   length. We need to take a closer look...*/
858 			ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
859 			if (ret) {
860 				printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading obsolete node at %08x\n", ret, ref_offset(raw));
861 				/* If we can't read it, we don't need to continue to obsolete it. Continue */
862 				continue;
863 			}
864 			if (retlen != rawlen) {
865 				printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
866 				       retlen, rawlen, ref_offset(raw));
867 				continue;
868 			}
869 
870 			if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
871 				continue;
872 
873 			/* If the name CRC doesn't match, skip */
874 			if (je32_to_cpu(rd->name_crc) != name_crc)
875 				continue;
876 
877 			/* If the name length doesn't match, or it's another deletion dirent, skip */
878 			if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
879 				continue;
880 
881 			/* OK, check the actual name now */
882 			if (memcmp(rd->name, fd->name, name_len))
883 				continue;
884 
885 			/* OK. The name really does match. There really is still an older node on
886 			   the flash which our deletion dirent obsoletes. So we have to write out
887 			   a new deletion dirent to replace it */
888 			up(&c->erase_free_sem);
889 
890 			D1(printk(KERN_DEBUG "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
891 				  ref_offset(fd->raw), fd->name, ref_offset(raw), je32_to_cpu(rd->ino)));
892 			kfree(rd);
893 
894 			return jffs2_garbage_collect_dirent(c, jeb, f, fd);
895 		}
896 
897 		up(&c->erase_free_sem);
898 		kfree(rd);
899 	}
900 
901 	/* FIXME: If we're deleting a dirent which contains the current mtime and ctime,
902 	   we should update the metadata node with those times accordingly */
903 
904 	/* No need for it any more. Just mark it obsolete and remove it from the list */
905 	while (*fdp) {
906 		if ((*fdp) == fd) {
907 			found = 1;
908 			*fdp = fd->next;
909 			break;
910 		}
911 		fdp = &(*fdp)->next;
912 	}
913 	if (!found) {
914 		printk(KERN_WARNING "Deletion dirent \"%s\" not found in list for ino #%u\n", fd->name, f->inocache->ino);
915 	}
916 	jffs2_mark_node_obsolete(c, fd->raw);
917 	jffs2_free_full_dirent(fd);
918 	return 0;
919 }
920 
921 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
922 				      struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
923 				      uint32_t start, uint32_t end)
924 {
925 	struct jffs2_raw_inode ri;
926 	struct jffs2_node_frag *frag;
927 	struct jffs2_full_dnode *new_fn;
928 	uint32_t alloclen, ilen;
929 	int ret;
930 
931 	D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
932 		  f->inocache->ino, start, end));
933 
934 	memset(&ri, 0, sizeof(ri));
935 
936 	if(fn->frags > 1) {
937 		size_t readlen;
938 		uint32_t crc;
939 		/* It's partially obsoleted by a later write. So we have to
940 		   write it out again with the _same_ version as before */
941 		ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
942 		if (readlen != sizeof(ri) || ret) {
943 			printk(KERN_WARNING "Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n", ret, readlen);
944 			goto fill;
945 		}
946 		if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
947 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
948 			       ref_offset(fn->raw),
949 			       je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
950 			return -EIO;
951 		}
952 		if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
953 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
954 			       ref_offset(fn->raw),
955 			       je32_to_cpu(ri.totlen), sizeof(ri));
956 			return -EIO;
957 		}
958 		crc = crc32(0, &ri, sizeof(ri)-8);
959 		if (crc != je32_to_cpu(ri.node_crc)) {
960 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
961 			       ref_offset(fn->raw),
962 			       je32_to_cpu(ri.node_crc), crc);
963 			/* FIXME: We could possibly deal with this by writing new holes for each frag */
964 			printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
965 			       start, end, f->inocache->ino);
966 			goto fill;
967 		}
968 		if (ri.compr != JFFS2_COMPR_ZERO) {
969 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node 0x%08x wasn't a hole node!\n", ref_offset(fn->raw));
970 			printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
971 			       start, end, f->inocache->ino);
972 			goto fill;
973 		}
974 	} else {
975 	fill:
976 		ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
977 		ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
978 		ri.totlen = cpu_to_je32(sizeof(ri));
979 		ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
980 
981 		ri.ino = cpu_to_je32(f->inocache->ino);
982 		ri.version = cpu_to_je32(++f->highest_version);
983 		ri.offset = cpu_to_je32(start);
984 		ri.dsize = cpu_to_je32(end - start);
985 		ri.csize = cpu_to_je32(0);
986 		ri.compr = JFFS2_COMPR_ZERO;
987 	}
988 
989 	frag = frag_last(&f->fragtree);
990 	if (frag)
991 		/* Fetch the inode length from the fragtree rather then
992 		 * from i_size since i_size may have not been updated yet */
993 		ilen = frag->ofs + frag->size;
994 	else
995 		ilen = JFFS2_F_I_SIZE(f);
996 
997 	ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
998 	ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
999 	ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1000 	ri.isize = cpu_to_je32(ilen);
1001 	ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1002 	ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1003 	ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1004 	ri.data_crc = cpu_to_je32(0);
1005 	ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1006 
1007 	ret = jffs2_reserve_space_gc(c, sizeof(ri), &alloclen,
1008 				     JFFS2_SUMMARY_INODE_SIZE);
1009 	if (ret) {
1010 		printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
1011 		       sizeof(ri), ret);
1012 		return ret;
1013 	}
1014 	new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, ALLOC_GC);
1015 
1016 	if (IS_ERR(new_fn)) {
1017 		printk(KERN_WARNING "Error writing new hole node: %ld\n", PTR_ERR(new_fn));
1018 		return PTR_ERR(new_fn);
1019 	}
1020 	if (je32_to_cpu(ri.version) == f->highest_version) {
1021 		jffs2_add_full_dnode_to_inode(c, f, new_fn);
1022 		if (f->metadata) {
1023 			jffs2_mark_node_obsolete(c, f->metadata->raw);
1024 			jffs2_free_full_dnode(f->metadata);
1025 			f->metadata = NULL;
1026 		}
1027 		return 0;
1028 	}
1029 
1030 	/*
1031 	 * We should only get here in the case where the node we are
1032 	 * replacing had more than one frag, so we kept the same version
1033 	 * number as before. (Except in case of error -- see 'goto fill;'
1034 	 * above.)
1035 	 */
1036 	D1(if(unlikely(fn->frags <= 1)) {
1037 		printk(KERN_WARNING "jffs2_garbage_collect_hole: Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
1038 		       fn->frags, je32_to_cpu(ri.version), f->highest_version,
1039 		       je32_to_cpu(ri.ino));
1040 	});
1041 
1042 	/* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
1043 	mark_ref_normal(new_fn->raw);
1044 
1045 	for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs);
1046 	     frag; frag = frag_next(frag)) {
1047 		if (frag->ofs > fn->size + fn->ofs)
1048 			break;
1049 		if (frag->node == fn) {
1050 			frag->node = new_fn;
1051 			new_fn->frags++;
1052 			fn->frags--;
1053 		}
1054 	}
1055 	if (fn->frags) {
1056 		printk(KERN_WARNING "jffs2_garbage_collect_hole: Old node still has frags!\n");
1057 		BUG();
1058 	}
1059 	if (!new_fn->frags) {
1060 		printk(KERN_WARNING "jffs2_garbage_collect_hole: New node has no frags!\n");
1061 		BUG();
1062 	}
1063 
1064 	jffs2_mark_node_obsolete(c, fn->raw);
1065 	jffs2_free_full_dnode(fn);
1066 
1067 	return 0;
1068 }
1069 
1070 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
1071 				       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1072 				       uint32_t start, uint32_t end)
1073 {
1074 	struct jffs2_full_dnode *new_fn;
1075 	struct jffs2_raw_inode ri;
1076 	uint32_t alloclen, offset, orig_end, orig_start;
1077 	int ret = 0;
1078 	unsigned char *comprbuf = NULL, *writebuf;
1079 	unsigned long pg;
1080 	unsigned char *pg_ptr;
1081 
1082 	memset(&ri, 0, sizeof(ri));
1083 
1084 	D1(printk(KERN_DEBUG "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1085 		  f->inocache->ino, start, end));
1086 
1087 	orig_end = end;
1088 	orig_start = start;
1089 
1090 	if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1091 		/* Attempt to do some merging. But only expand to cover logically
1092 		   adjacent frags if the block containing them is already considered
1093 		   to be dirty. Otherwise we end up with GC just going round in
1094 		   circles dirtying the nodes it already wrote out, especially
1095 		   on NAND where we have small eraseblocks and hence a much higher
1096 		   chance of nodes having to be split to cross boundaries. */
1097 
1098 		struct jffs2_node_frag *frag;
1099 		uint32_t min, max;
1100 
1101 		min = start & ~(PAGE_CACHE_SIZE-1);
1102 		max = min + PAGE_CACHE_SIZE;
1103 
1104 		frag = jffs2_lookup_node_frag(&f->fragtree, start);
1105 
1106 		/* BUG_ON(!frag) but that'll happen anyway... */
1107 
1108 		BUG_ON(frag->ofs != start);
1109 
1110 		/* First grow down... */
1111 		while((frag = frag_prev(frag)) && frag->ofs >= min) {
1112 
1113 			/* If the previous frag doesn't even reach the beginning, there's
1114 			   excessive fragmentation. Just merge. */
1115 			if (frag->ofs > min) {
1116 				D1(printk(KERN_DEBUG "Expanding down to cover partial frag (0x%x-0x%x)\n",
1117 					  frag->ofs, frag->ofs+frag->size));
1118 				start = frag->ofs;
1119 				continue;
1120 			}
1121 			/* OK. This frag holds the first byte of the page. */
1122 			if (!frag->node || !frag->node->raw) {
1123 				D1(printk(KERN_DEBUG "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1124 					  frag->ofs, frag->ofs+frag->size));
1125 				break;
1126 			} else {
1127 
1128 				/* OK, it's a frag which extends to the beginning of the page. Does it live
1129 				   in a block which is still considered clean? If so, don't obsolete it.
1130 				   If not, cover it anyway. */
1131 
1132 				struct jffs2_raw_node_ref *raw = frag->node->raw;
1133 				struct jffs2_eraseblock *jeb;
1134 
1135 				jeb = &c->blocks[raw->flash_offset / c->sector_size];
1136 
1137 				if (jeb == c->gcblock) {
1138 					D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1139 						  frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1140 					start = frag->ofs;
1141 					break;
1142 				}
1143 				if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1144 					D1(printk(KERN_DEBUG "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1145 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1146 					break;
1147 				}
1148 
1149 				D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1150 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1151 				start = frag->ofs;
1152 				break;
1153 			}
1154 		}
1155 
1156 		/* ... then up */
1157 
1158 		/* Find last frag which is actually part of the node we're to GC. */
1159 		frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1160 
1161 		while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1162 
1163 			/* If the previous frag doesn't even reach the beginning, there's lots
1164 			   of fragmentation. Just merge. */
1165 			if (frag->ofs+frag->size < max) {
1166 				D1(printk(KERN_DEBUG "Expanding up to cover partial frag (0x%x-0x%x)\n",
1167 					  frag->ofs, frag->ofs+frag->size));
1168 				end = frag->ofs + frag->size;
1169 				continue;
1170 			}
1171 
1172 			if (!frag->node || !frag->node->raw) {
1173 				D1(printk(KERN_DEBUG "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1174 					  frag->ofs, frag->ofs+frag->size));
1175 				break;
1176 			} else {
1177 
1178 				/* OK, it's a frag which extends to the beginning of the page. Does it live
1179 				   in a block which is still considered clean? If so, don't obsolete it.
1180 				   If not, cover it anyway. */
1181 
1182 				struct jffs2_raw_node_ref *raw = frag->node->raw;
1183 				struct jffs2_eraseblock *jeb;
1184 
1185 				jeb = &c->blocks[raw->flash_offset / c->sector_size];
1186 
1187 				if (jeb == c->gcblock) {
1188 					D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1189 						  frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1190 					end = frag->ofs + frag->size;
1191 					break;
1192 				}
1193 				if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1194 					D1(printk(KERN_DEBUG "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1195 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1196 					break;
1197 				}
1198 
1199 				D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1200 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1201 				end = frag->ofs + frag->size;
1202 				break;
1203 			}
1204 		}
1205 		D1(printk(KERN_DEBUG "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
1206 			  orig_start, orig_end, start, end));
1207 
1208 		D1(BUG_ON(end > frag_last(&f->fragtree)->ofs + frag_last(&f->fragtree)->size));
1209 		BUG_ON(end < orig_end);
1210 		BUG_ON(start > orig_start);
1211 	}
1212 
1213 	/* First, use readpage() to read the appropriate page into the page cache */
1214 	/* Q: What happens if we actually try to GC the _same_ page for which commit_write()
1215 	 *    triggered garbage collection in the first place?
1216 	 * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
1217 	 *    page OK. We'll actually write it out again in commit_write, which is a little
1218 	 *    suboptimal, but at least we're correct.
1219 	 */
1220 	pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
1221 
1222 	if (IS_ERR(pg_ptr)) {
1223 		printk(KERN_WARNING "read_cache_page() returned error: %ld\n", PTR_ERR(pg_ptr));
1224 		return PTR_ERR(pg_ptr);
1225 	}
1226 
1227 	offset = start;
1228 	while(offset < orig_end) {
1229 		uint32_t datalen;
1230 		uint32_t cdatalen;
1231 		uint16_t comprtype = JFFS2_COMPR_NONE;
1232 
1233 		ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN,
1234 					&alloclen, JFFS2_SUMMARY_INODE_SIZE);
1235 
1236 		if (ret) {
1237 			printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1238 			       sizeof(ri)+ JFFS2_MIN_DATA_LEN, ret);
1239 			break;
1240 		}
1241 		cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1242 		datalen = end - offset;
1243 
1244 		writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
1245 
1246 		comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1247 
1248 		ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1249 		ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1250 		ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1251 		ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1252 
1253 		ri.ino = cpu_to_je32(f->inocache->ino);
1254 		ri.version = cpu_to_je32(++f->highest_version);
1255 		ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1256 		ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1257 		ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1258 		ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1259 		ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1260 		ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1261 		ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1262 		ri.offset = cpu_to_je32(offset);
1263 		ri.csize = cpu_to_je32(cdatalen);
1264 		ri.dsize = cpu_to_je32(datalen);
1265 		ri.compr = comprtype & 0xff;
1266 		ri.usercompr = (comprtype >> 8) & 0xff;
1267 		ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1268 		ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
1269 
1270 		new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, ALLOC_GC);
1271 
1272 		jffs2_free_comprbuf(comprbuf, writebuf);
1273 
1274 		if (IS_ERR(new_fn)) {
1275 			printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
1276 			ret = PTR_ERR(new_fn);
1277 			break;
1278 		}
1279 		ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1280 		offset += datalen;
1281 		if (f->metadata) {
1282 			jffs2_mark_node_obsolete(c, f->metadata->raw);
1283 			jffs2_free_full_dnode(f->metadata);
1284 			f->metadata = NULL;
1285 		}
1286 	}
1287 
1288 	jffs2_gc_release_page(c, pg_ptr, &pg);
1289 	return ret;
1290 }
1291