xref: /linux/fs/jffs2/gc.c (revision 08ec212c0f92cbf30e3ecc7349f18151714041d6)
1 /*
2  * JFFS2 -- Journalling Flash File System, Version 2.
3  *
4  * Copyright © 2001-2007 Red Hat, Inc.
5  * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
6  *
7  * Created by David Woodhouse <dwmw2@infradead.org>
8  *
9  * For licensing information, see the file 'LICENCE' in this directory.
10  *
11  */
12 
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14 
15 #include <linux/kernel.h>
16 #include <linux/mtd/mtd.h>
17 #include <linux/slab.h>
18 #include <linux/pagemap.h>
19 #include <linux/crc32.h>
20 #include <linux/compiler.h>
21 #include <linux/stat.h>
22 #include "nodelist.h"
23 #include "compr.h"
24 
25 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
26 					  struct jffs2_inode_cache *ic,
27 					  struct jffs2_raw_node_ref *raw);
28 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
29 					struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
30 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
31 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
32 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
33 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
34 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
35 				      struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
36 				      uint32_t start, uint32_t end);
37 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
38 				       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
39 				       uint32_t start, uint32_t end);
40 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
41 			       struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
42 
43 /* Called with erase_completion_lock held */
44 static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
45 {
46 	struct jffs2_eraseblock *ret;
47 	struct list_head *nextlist = NULL;
48 	int n = jiffies % 128;
49 
50 	/* Pick an eraseblock to garbage collect next. This is where we'll
51 	   put the clever wear-levelling algorithms. Eventually.  */
52 	/* We possibly want to favour the dirtier blocks more when the
53 	   number of free blocks is low. */
54 again:
55 	if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
56 		jffs2_dbg(1, "Picking block from bad_used_list to GC next\n");
57 		nextlist = &c->bad_used_list;
58 	} else if (n < 50 && !list_empty(&c->erasable_list)) {
59 		/* Note that most of them will have gone directly to be erased.
60 		   So don't favour the erasable_list _too_ much. */
61 		jffs2_dbg(1, "Picking block from erasable_list to GC next\n");
62 		nextlist = &c->erasable_list;
63 	} else if (n < 110 && !list_empty(&c->very_dirty_list)) {
64 		/* Most of the time, pick one off the very_dirty list */
65 		jffs2_dbg(1, "Picking block from very_dirty_list to GC next\n");
66 		nextlist = &c->very_dirty_list;
67 	} else if (n < 126 && !list_empty(&c->dirty_list)) {
68 		jffs2_dbg(1, "Picking block from dirty_list to GC next\n");
69 		nextlist = &c->dirty_list;
70 	} else if (!list_empty(&c->clean_list)) {
71 		jffs2_dbg(1, "Picking block from clean_list to GC next\n");
72 		nextlist = &c->clean_list;
73 	} else if (!list_empty(&c->dirty_list)) {
74 		jffs2_dbg(1, "Picking block from dirty_list to GC next (clean_list was empty)\n");
75 
76 		nextlist = &c->dirty_list;
77 	} else if (!list_empty(&c->very_dirty_list)) {
78 		jffs2_dbg(1, "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n");
79 		nextlist = &c->very_dirty_list;
80 	} else if (!list_empty(&c->erasable_list)) {
81 		jffs2_dbg(1, "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n");
82 
83 		nextlist = &c->erasable_list;
84 	} else if (!list_empty(&c->erasable_pending_wbuf_list)) {
85 		/* There are blocks are wating for the wbuf sync */
86 		jffs2_dbg(1, "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n");
87 		spin_unlock(&c->erase_completion_lock);
88 		jffs2_flush_wbuf_pad(c);
89 		spin_lock(&c->erase_completion_lock);
90 		goto again;
91 	} else {
92 		/* Eep. All were empty */
93 		jffs2_dbg(1, "No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n");
94 		return NULL;
95 	}
96 
97 	ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
98 	list_del(&ret->list);
99 	c->gcblock = ret;
100 	ret->gc_node = ret->first_node;
101 	if (!ret->gc_node) {
102 		pr_warn("Eep. ret->gc_node for block at 0x%08x is NULL\n",
103 			ret->offset);
104 		BUG();
105 	}
106 
107 	/* Have we accidentally picked a clean block with wasted space ? */
108 	if (ret->wasted_size) {
109 		jffs2_dbg(1, "Converting wasted_size %08x to dirty_size\n",
110 			  ret->wasted_size);
111 		ret->dirty_size += ret->wasted_size;
112 		c->wasted_size -= ret->wasted_size;
113 		c->dirty_size += ret->wasted_size;
114 		ret->wasted_size = 0;
115 	}
116 
117 	return ret;
118 }
119 
120 /* jffs2_garbage_collect_pass
121  * Make a single attempt to progress GC. Move one node, and possibly
122  * start erasing one eraseblock.
123  */
124 int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
125 {
126 	struct jffs2_inode_info *f;
127 	struct jffs2_inode_cache *ic;
128 	struct jffs2_eraseblock *jeb;
129 	struct jffs2_raw_node_ref *raw;
130 	uint32_t gcblock_dirty;
131 	int ret = 0, inum, nlink;
132 	int xattr = 0;
133 
134 	if (mutex_lock_interruptible(&c->alloc_sem))
135 		return -EINTR;
136 
137 	for (;;) {
138 		spin_lock(&c->erase_completion_lock);
139 		if (!c->unchecked_size)
140 			break;
141 
142 		/* We can't start doing GC yet. We haven't finished checking
143 		   the node CRCs etc. Do it now. */
144 
145 		/* checked_ino is protected by the alloc_sem */
146 		if (c->checked_ino > c->highest_ino && xattr) {
147 			pr_crit("Checked all inodes but still 0x%x bytes of unchecked space?\n",
148 				c->unchecked_size);
149 			jffs2_dbg_dump_block_lists_nolock(c);
150 			spin_unlock(&c->erase_completion_lock);
151 			mutex_unlock(&c->alloc_sem);
152 			return -ENOSPC;
153 		}
154 
155 		spin_unlock(&c->erase_completion_lock);
156 
157 		if (!xattr)
158 			xattr = jffs2_verify_xattr(c);
159 
160 		spin_lock(&c->inocache_lock);
161 
162 		ic = jffs2_get_ino_cache(c, c->checked_ino++);
163 
164 		if (!ic) {
165 			spin_unlock(&c->inocache_lock);
166 			continue;
167 		}
168 
169 		if (!ic->pino_nlink) {
170 			jffs2_dbg(1, "Skipping check of ino #%d with nlink/pino zero\n",
171 				  ic->ino);
172 			spin_unlock(&c->inocache_lock);
173 			jffs2_xattr_delete_inode(c, ic);
174 			continue;
175 		}
176 		switch(ic->state) {
177 		case INO_STATE_CHECKEDABSENT:
178 		case INO_STATE_PRESENT:
179 			jffs2_dbg(1, "Skipping ino #%u already checked\n",
180 				  ic->ino);
181 			spin_unlock(&c->inocache_lock);
182 			continue;
183 
184 		case INO_STATE_GC:
185 		case INO_STATE_CHECKING:
186 			pr_warn("Inode #%u is in state %d during CRC check phase!\n",
187 				ic->ino, ic->state);
188 			spin_unlock(&c->inocache_lock);
189 			BUG();
190 
191 		case INO_STATE_READING:
192 			/* We need to wait for it to finish, lest we move on
193 			   and trigger the BUG() above while we haven't yet
194 			   finished checking all its nodes */
195 			jffs2_dbg(1, "Waiting for ino #%u to finish reading\n",
196 				  ic->ino);
197 			/* We need to come back again for the _same_ inode. We've
198 			 made no progress in this case, but that should be OK */
199 			c->checked_ino--;
200 
201 			mutex_unlock(&c->alloc_sem);
202 			sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
203 			return 0;
204 
205 		default:
206 			BUG();
207 
208 		case INO_STATE_UNCHECKED:
209 			;
210 		}
211 		ic->state = INO_STATE_CHECKING;
212 		spin_unlock(&c->inocache_lock);
213 
214 		jffs2_dbg(1, "%s(): triggering inode scan of ino#%u\n",
215 			  __func__, ic->ino);
216 
217 		ret = jffs2_do_crccheck_inode(c, ic);
218 		if (ret)
219 			pr_warn("Returned error for crccheck of ino #%u. Expect badness...\n",
220 				ic->ino);
221 
222 		jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
223 		mutex_unlock(&c->alloc_sem);
224 		return ret;
225 	}
226 
227 	/* If there are any blocks which need erasing, erase them now */
228 	if (!list_empty(&c->erase_complete_list) ||
229 	    !list_empty(&c->erase_pending_list)) {
230 		spin_unlock(&c->erase_completion_lock);
231 		mutex_unlock(&c->alloc_sem);
232 		jffs2_dbg(1, "%s(): erasing pending blocks\n", __func__);
233 		if (jffs2_erase_pending_blocks(c, 1))
234 			return 0;
235 
236 		jffs2_dbg(1, "No progress from erasing block; doing GC anyway\n");
237 		mutex_lock(&c->alloc_sem);
238 		spin_lock(&c->erase_completion_lock);
239 	}
240 
241 	/* First, work out which block we're garbage-collecting */
242 	jeb = c->gcblock;
243 
244 	if (!jeb)
245 		jeb = jffs2_find_gc_block(c);
246 
247 	if (!jeb) {
248 		/* Couldn't find a free block. But maybe we can just erase one and make 'progress'? */
249 		if (c->nr_erasing_blocks) {
250 			spin_unlock(&c->erase_completion_lock);
251 			mutex_unlock(&c->alloc_sem);
252 			return -EAGAIN;
253 		}
254 		jffs2_dbg(1, "Couldn't find erase block to garbage collect!\n");
255 		spin_unlock(&c->erase_completion_lock);
256 		mutex_unlock(&c->alloc_sem);
257 		return -EIO;
258 	}
259 
260 	jffs2_dbg(1, "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n",
261 		  jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size);
262 	D1(if (c->nextblock)
263 	   printk(KERN_DEBUG "Nextblock at  %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
264 
265 	if (!jeb->used_size) {
266 		mutex_unlock(&c->alloc_sem);
267 		goto eraseit;
268 	}
269 
270 	raw = jeb->gc_node;
271 	gcblock_dirty = jeb->dirty_size;
272 
273 	while(ref_obsolete(raw)) {
274 		jffs2_dbg(1, "Node at 0x%08x is obsolete... skipping\n",
275 			  ref_offset(raw));
276 		raw = ref_next(raw);
277 		if (unlikely(!raw)) {
278 			pr_warn("eep. End of raw list while still supposedly nodes to GC\n");
279 			pr_warn("erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
280 				jeb->offset, jeb->free_size,
281 				jeb->dirty_size, jeb->used_size);
282 			jeb->gc_node = raw;
283 			spin_unlock(&c->erase_completion_lock);
284 			mutex_unlock(&c->alloc_sem);
285 			BUG();
286 		}
287 	}
288 	jeb->gc_node = raw;
289 
290 	jffs2_dbg(1, "Going to garbage collect node at 0x%08x\n",
291 		  ref_offset(raw));
292 
293 	if (!raw->next_in_ino) {
294 		/* Inode-less node. Clean marker, snapshot or something like that */
295 		spin_unlock(&c->erase_completion_lock);
296 		if (ref_flags(raw) == REF_PRISTINE) {
297 			/* It's an unknown node with JFFS2_FEATURE_RWCOMPAT_COPY */
298 			jffs2_garbage_collect_pristine(c, NULL, raw);
299 		} else {
300 			/* Just mark it obsolete */
301 			jffs2_mark_node_obsolete(c, raw);
302 		}
303 		mutex_unlock(&c->alloc_sem);
304 		goto eraseit_lock;
305 	}
306 
307 	ic = jffs2_raw_ref_to_ic(raw);
308 
309 #ifdef CONFIG_JFFS2_FS_XATTR
310 	/* When 'ic' refers xattr_datum/xattr_ref, this node is GCed as xattr.
311 	 * We can decide whether this node is inode or xattr by ic->class.     */
312 	if (ic->class == RAWNODE_CLASS_XATTR_DATUM
313 	    || ic->class == RAWNODE_CLASS_XATTR_REF) {
314 		spin_unlock(&c->erase_completion_lock);
315 
316 		if (ic->class == RAWNODE_CLASS_XATTR_DATUM) {
317 			ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic, raw);
318 		} else {
319 			ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic, raw);
320 		}
321 		goto test_gcnode;
322 	}
323 #endif
324 
325 	/* We need to hold the inocache. Either the erase_completion_lock or
326 	   the inocache_lock are sufficient; we trade down since the inocache_lock
327 	   causes less contention. */
328 	spin_lock(&c->inocache_lock);
329 
330 	spin_unlock(&c->erase_completion_lock);
331 
332 	jffs2_dbg(1, "%s(): collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n",
333 		  __func__, jeb->offset, ref_offset(raw), ref_flags(raw),
334 		  ic->ino);
335 
336 	/* Three possibilities:
337 	   1. Inode is already in-core. We must iget it and do proper
338 	      updating to its fragtree, etc.
339 	   2. Inode is not in-core, node is REF_PRISTINE. We lock the
340 	      inocache to prevent a read_inode(), copy the node intact.
341 	   3. Inode is not in-core, node is not pristine. We must iget()
342 	      and take the slow path.
343 	*/
344 
345 	switch(ic->state) {
346 	case INO_STATE_CHECKEDABSENT:
347 		/* It's been checked, but it's not currently in-core.
348 		   We can just copy any pristine nodes, but have
349 		   to prevent anyone else from doing read_inode() while
350 		   we're at it, so we set the state accordingly */
351 		if (ref_flags(raw) == REF_PRISTINE)
352 			ic->state = INO_STATE_GC;
353 		else {
354 			jffs2_dbg(1, "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
355 				  ic->ino);
356 		}
357 		break;
358 
359 	case INO_STATE_PRESENT:
360 		/* It's in-core. GC must iget() it. */
361 		break;
362 
363 	case INO_STATE_UNCHECKED:
364 	case INO_STATE_CHECKING:
365 	case INO_STATE_GC:
366 		/* Should never happen. We should have finished checking
367 		   by the time we actually start doing any GC, and since
368 		   we're holding the alloc_sem, no other garbage collection
369 		   can happen.
370 		*/
371 		pr_crit("Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
372 			ic->ino, ic->state);
373 		mutex_unlock(&c->alloc_sem);
374 		spin_unlock(&c->inocache_lock);
375 		BUG();
376 
377 	case INO_STATE_READING:
378 		/* Someone's currently trying to read it. We must wait for
379 		   them to finish and then go through the full iget() route
380 		   to do the GC. However, sometimes read_inode() needs to get
381 		   the alloc_sem() (for marking nodes invalid) so we must
382 		   drop the alloc_sem before sleeping. */
383 
384 		mutex_unlock(&c->alloc_sem);
385 		jffs2_dbg(1, "%s(): waiting for ino #%u in state %d\n",
386 			  __func__, ic->ino, ic->state);
387 		sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
388 		/* And because we dropped the alloc_sem we must start again from the
389 		   beginning. Ponder chance of livelock here -- we're returning success
390 		   without actually making any progress.
391 
392 		   Q: What are the chances that the inode is back in INO_STATE_READING
393 		   again by the time we next enter this function? And that this happens
394 		   enough times to cause a real delay?
395 
396 		   A: Small enough that I don't care :)
397 		*/
398 		return 0;
399 	}
400 
401 	/* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
402 	   node intact, and we don't have to muck about with the fragtree etc.
403 	   because we know it's not in-core. If it _was_ in-core, we go through
404 	   all the iget() crap anyway */
405 
406 	if (ic->state == INO_STATE_GC) {
407 		spin_unlock(&c->inocache_lock);
408 
409 		ret = jffs2_garbage_collect_pristine(c, ic, raw);
410 
411 		spin_lock(&c->inocache_lock);
412 		ic->state = INO_STATE_CHECKEDABSENT;
413 		wake_up(&c->inocache_wq);
414 
415 		if (ret != -EBADFD) {
416 			spin_unlock(&c->inocache_lock);
417 			goto test_gcnode;
418 		}
419 
420 		/* Fall through if it wanted us to, with inocache_lock held */
421 	}
422 
423 	/* Prevent the fairly unlikely race where the gcblock is
424 	   entirely obsoleted by the final close of a file which had
425 	   the only valid nodes in the block, followed by erasure,
426 	   followed by freeing of the ic because the erased block(s)
427 	   held _all_ the nodes of that inode.... never been seen but
428 	   it's vaguely possible. */
429 
430 	inum = ic->ino;
431 	nlink = ic->pino_nlink;
432 	spin_unlock(&c->inocache_lock);
433 
434 	f = jffs2_gc_fetch_inode(c, inum, !nlink);
435 	if (IS_ERR(f)) {
436 		ret = PTR_ERR(f);
437 		goto release_sem;
438 	}
439 	if (!f) {
440 		ret = 0;
441 		goto release_sem;
442 	}
443 
444 	ret = jffs2_garbage_collect_live(c, jeb, raw, f);
445 
446 	jffs2_gc_release_inode(c, f);
447 
448  test_gcnode:
449 	if (jeb->dirty_size == gcblock_dirty && !ref_obsolete(jeb->gc_node)) {
450 		/* Eep. This really should never happen. GC is broken */
451 		pr_err("Error garbage collecting node at %08x!\n",
452 		       ref_offset(jeb->gc_node));
453 		ret = -ENOSPC;
454 	}
455  release_sem:
456 	mutex_unlock(&c->alloc_sem);
457 
458  eraseit_lock:
459 	/* If we've finished this block, start it erasing */
460 	spin_lock(&c->erase_completion_lock);
461 
462  eraseit:
463 	if (c->gcblock && !c->gcblock->used_size) {
464 		jffs2_dbg(1, "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n",
465 			  c->gcblock->offset);
466 		/* We're GC'ing an empty block? */
467 		list_add_tail(&c->gcblock->list, &c->erase_pending_list);
468 		c->gcblock = NULL;
469 		c->nr_erasing_blocks++;
470 		jffs2_garbage_collect_trigger(c);
471 	}
472 	spin_unlock(&c->erase_completion_lock);
473 
474 	return ret;
475 }
476 
477 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
478 				      struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
479 {
480 	struct jffs2_node_frag *frag;
481 	struct jffs2_full_dnode *fn = NULL;
482 	struct jffs2_full_dirent *fd;
483 	uint32_t start = 0, end = 0, nrfrags = 0;
484 	int ret = 0;
485 
486 	mutex_lock(&f->sem);
487 
488 	/* Now we have the lock for this inode. Check that it's still the one at the head
489 	   of the list. */
490 
491 	spin_lock(&c->erase_completion_lock);
492 
493 	if (c->gcblock != jeb) {
494 		spin_unlock(&c->erase_completion_lock);
495 		jffs2_dbg(1, "GC block is no longer gcblock. Restart\n");
496 		goto upnout;
497 	}
498 	if (ref_obsolete(raw)) {
499 		spin_unlock(&c->erase_completion_lock);
500 		jffs2_dbg(1, "node to be GC'd was obsoleted in the meantime.\n");
501 		/* They'll call again */
502 		goto upnout;
503 	}
504 	spin_unlock(&c->erase_completion_lock);
505 
506 	/* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
507 	if (f->metadata && f->metadata->raw == raw) {
508 		fn = f->metadata;
509 		ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
510 		goto upnout;
511 	}
512 
513 	/* FIXME. Read node and do lookup? */
514 	for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
515 		if (frag->node && frag->node->raw == raw) {
516 			fn = frag->node;
517 			end = frag->ofs + frag->size;
518 			if (!nrfrags++)
519 				start = frag->ofs;
520 			if (nrfrags == frag->node->frags)
521 				break; /* We've found them all */
522 		}
523 	}
524 	if (fn) {
525 		if (ref_flags(raw) == REF_PRISTINE) {
526 			ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
527 			if (!ret) {
528 				/* Urgh. Return it sensibly. */
529 				frag->node->raw = f->inocache->nodes;
530 			}
531 			if (ret != -EBADFD)
532 				goto upnout;
533 		}
534 		/* We found a datanode. Do the GC */
535 		if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
536 			/* It crosses a page boundary. Therefore, it must be a hole. */
537 			ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
538 		} else {
539 			/* It could still be a hole. But we GC the page this way anyway */
540 			ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
541 		}
542 		goto upnout;
543 	}
544 
545 	/* Wasn't a dnode. Try dirent */
546 	for (fd = f->dents; fd; fd=fd->next) {
547 		if (fd->raw == raw)
548 			break;
549 	}
550 
551 	if (fd && fd->ino) {
552 		ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
553 	} else if (fd) {
554 		ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
555 	} else {
556 		pr_warn("Raw node at 0x%08x wasn't in node lists for ino #%u\n",
557 			ref_offset(raw), f->inocache->ino);
558 		if (ref_obsolete(raw)) {
559 			pr_warn("But it's obsolete so we don't mind too much\n");
560 		} else {
561 			jffs2_dbg_dump_node(c, ref_offset(raw));
562 			BUG();
563 		}
564 	}
565  upnout:
566 	mutex_unlock(&f->sem);
567 
568 	return ret;
569 }
570 
571 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
572 					  struct jffs2_inode_cache *ic,
573 					  struct jffs2_raw_node_ref *raw)
574 {
575 	union jffs2_node_union *node;
576 	size_t retlen;
577 	int ret;
578 	uint32_t phys_ofs, alloclen;
579 	uint32_t crc, rawlen;
580 	int retried = 0;
581 
582 	jffs2_dbg(1, "Going to GC REF_PRISTINE node at 0x%08x\n",
583 		  ref_offset(raw));
584 
585 	alloclen = rawlen = ref_totlen(c, c->gcblock, raw);
586 
587 	/* Ask for a small amount of space (or the totlen if smaller) because we
588 	   don't want to force wastage of the end of a block if splitting would
589 	   work. */
590 	if (ic && alloclen > sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN)
591 		alloclen = sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN;
592 
593 	ret = jffs2_reserve_space_gc(c, alloclen, &alloclen, rawlen);
594 	/* 'rawlen' is not the exact summary size; it is only an upper estimation */
595 
596 	if (ret)
597 		return ret;
598 
599 	if (alloclen < rawlen) {
600 		/* Doesn't fit untouched. We'll go the old route and split it */
601 		return -EBADFD;
602 	}
603 
604 	node = kmalloc(rawlen, GFP_KERNEL);
605 	if (!node)
606 		return -ENOMEM;
607 
608 	ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
609 	if (!ret && retlen != rawlen)
610 		ret = -EIO;
611 	if (ret)
612 		goto out_node;
613 
614 	crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
615 	if (je32_to_cpu(node->u.hdr_crc) != crc) {
616 		pr_warn("Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
617 			ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
618 		goto bail;
619 	}
620 
621 	switch(je16_to_cpu(node->u.nodetype)) {
622 	case JFFS2_NODETYPE_INODE:
623 		crc = crc32(0, node, sizeof(node->i)-8);
624 		if (je32_to_cpu(node->i.node_crc) != crc) {
625 			pr_warn("Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
626 				ref_offset(raw), je32_to_cpu(node->i.node_crc),
627 				crc);
628 			goto bail;
629 		}
630 
631 		if (je32_to_cpu(node->i.dsize)) {
632 			crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
633 			if (je32_to_cpu(node->i.data_crc) != crc) {
634 				pr_warn("Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
635 					ref_offset(raw),
636 					je32_to_cpu(node->i.data_crc), crc);
637 				goto bail;
638 			}
639 		}
640 		break;
641 
642 	case JFFS2_NODETYPE_DIRENT:
643 		crc = crc32(0, node, sizeof(node->d)-8);
644 		if (je32_to_cpu(node->d.node_crc) != crc) {
645 			pr_warn("Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
646 				ref_offset(raw),
647 				je32_to_cpu(node->d.node_crc), crc);
648 			goto bail;
649 		}
650 
651 		if (strnlen(node->d.name, node->d.nsize) != node->d.nsize) {
652 			pr_warn("Name in dirent node at 0x%08x contains zeroes\n",
653 				ref_offset(raw));
654 			goto bail;
655 		}
656 
657 		if (node->d.nsize) {
658 			crc = crc32(0, node->d.name, node->d.nsize);
659 			if (je32_to_cpu(node->d.name_crc) != crc) {
660 				pr_warn("Name CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
661 					ref_offset(raw),
662 					je32_to_cpu(node->d.name_crc), crc);
663 				goto bail;
664 			}
665 		}
666 		break;
667 	default:
668 		/* If it's inode-less, we don't _know_ what it is. Just copy it intact */
669 		if (ic) {
670 			pr_warn("Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
671 				ref_offset(raw), je16_to_cpu(node->u.nodetype));
672 			goto bail;
673 		}
674 	}
675 
676 	/* OK, all the CRCs are good; this node can just be copied as-is. */
677  retry:
678 	phys_ofs = write_ofs(c);
679 
680 	ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
681 
682 	if (ret || (retlen != rawlen)) {
683 		pr_notice("Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
684 			  rawlen, phys_ofs, ret, retlen);
685 		if (retlen) {
686 			jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, rawlen, NULL);
687 		} else {
688 			pr_notice("Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n",
689 				  phys_ofs);
690 		}
691 		if (!retried) {
692 			/* Try to reallocate space and retry */
693 			uint32_t dummy;
694 			struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
695 
696 			retried = 1;
697 
698 			jffs2_dbg(1, "Retrying failed write of REF_PRISTINE node.\n");
699 
700 			jffs2_dbg_acct_sanity_check(c,jeb);
701 			jffs2_dbg_acct_paranoia_check(c, jeb);
702 
703 			ret = jffs2_reserve_space_gc(c, rawlen, &dummy, rawlen);
704 						/* this is not the exact summary size of it,
705 							it is only an upper estimation */
706 
707 			if (!ret) {
708 				jffs2_dbg(1, "Allocated space at 0x%08x to retry failed write.\n",
709 					  phys_ofs);
710 
711 				jffs2_dbg_acct_sanity_check(c,jeb);
712 				jffs2_dbg_acct_paranoia_check(c, jeb);
713 
714 				goto retry;
715 			}
716 			jffs2_dbg(1, "Failed to allocate space to retry failed write: %d!\n",
717 				  ret);
718 		}
719 
720 		if (!ret)
721 			ret = -EIO;
722 		goto out_node;
723 	}
724 	jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, rawlen, ic);
725 
726 	jffs2_mark_node_obsolete(c, raw);
727 	jffs2_dbg(1, "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n",
728 		  ref_offset(raw));
729 
730  out_node:
731 	kfree(node);
732 	return ret;
733  bail:
734 	ret = -EBADFD;
735 	goto out_node;
736 }
737 
738 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
739 					struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
740 {
741 	struct jffs2_full_dnode *new_fn;
742 	struct jffs2_raw_inode ri;
743 	struct jffs2_node_frag *last_frag;
744 	union jffs2_device_node dev;
745 	char *mdata = NULL;
746 	int mdatalen = 0;
747 	uint32_t alloclen, ilen;
748 	int ret;
749 
750 	if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
751 	    S_ISCHR(JFFS2_F_I_MODE(f)) ) {
752 		/* For these, we don't actually need to read the old node */
753 		mdatalen = jffs2_encode_dev(&dev, JFFS2_F_I_RDEV(f));
754 		mdata = (char *)&dev;
755 		jffs2_dbg(1, "%s(): Writing %d bytes of kdev_t\n",
756 			  __func__, mdatalen);
757 	} else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
758 		mdatalen = fn->size;
759 		mdata = kmalloc(fn->size, GFP_KERNEL);
760 		if (!mdata) {
761 			pr_warn("kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
762 			return -ENOMEM;
763 		}
764 		ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
765 		if (ret) {
766 			pr_warn("read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n",
767 				ret);
768 			kfree(mdata);
769 			return ret;
770 		}
771 		jffs2_dbg(1, "%s(): Writing %d bites of symlink target\n",
772 			  __func__, mdatalen);
773 
774 	}
775 
776 	ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &alloclen,
777 				JFFS2_SUMMARY_INODE_SIZE);
778 	if (ret) {
779 		pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
780 			sizeof(ri) + mdatalen, ret);
781 		goto out;
782 	}
783 
784 	last_frag = frag_last(&f->fragtree);
785 	if (last_frag)
786 		/* Fetch the inode length from the fragtree rather then
787 		 * from i_size since i_size may have not been updated yet */
788 		ilen = last_frag->ofs + last_frag->size;
789 	else
790 		ilen = JFFS2_F_I_SIZE(f);
791 
792 	memset(&ri, 0, sizeof(ri));
793 	ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
794 	ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
795 	ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
796 	ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
797 
798 	ri.ino = cpu_to_je32(f->inocache->ino);
799 	ri.version = cpu_to_je32(++f->highest_version);
800 	ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
801 	ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
802 	ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
803 	ri.isize = cpu_to_je32(ilen);
804 	ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
805 	ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
806 	ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
807 	ri.offset = cpu_to_je32(0);
808 	ri.csize = cpu_to_je32(mdatalen);
809 	ri.dsize = cpu_to_je32(mdatalen);
810 	ri.compr = JFFS2_COMPR_NONE;
811 	ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
812 	ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
813 
814 	new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, ALLOC_GC);
815 
816 	if (IS_ERR(new_fn)) {
817 		pr_warn("Error writing new dnode: %ld\n", PTR_ERR(new_fn));
818 		ret = PTR_ERR(new_fn);
819 		goto out;
820 	}
821 	jffs2_mark_node_obsolete(c, fn->raw);
822 	jffs2_free_full_dnode(fn);
823 	f->metadata = new_fn;
824  out:
825 	if (S_ISLNK(JFFS2_F_I_MODE(f)))
826 		kfree(mdata);
827 	return ret;
828 }
829 
830 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
831 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
832 {
833 	struct jffs2_full_dirent *new_fd;
834 	struct jffs2_raw_dirent rd;
835 	uint32_t alloclen;
836 	int ret;
837 
838 	rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
839 	rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
840 	rd.nsize = strlen(fd->name);
841 	rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
842 	rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
843 
844 	rd.pino = cpu_to_je32(f->inocache->ino);
845 	rd.version = cpu_to_je32(++f->highest_version);
846 	rd.ino = cpu_to_je32(fd->ino);
847 	/* If the times on this inode were set by explicit utime() they can be different,
848 	   so refrain from splatting them. */
849 	if (JFFS2_F_I_MTIME(f) == JFFS2_F_I_CTIME(f))
850 		rd.mctime = cpu_to_je32(JFFS2_F_I_MTIME(f));
851 	else
852 		rd.mctime = cpu_to_je32(0);
853 	rd.type = fd->type;
854 	rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
855 	rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
856 
857 	ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &alloclen,
858 				JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize));
859 	if (ret) {
860 		pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
861 			sizeof(rd)+rd.nsize, ret);
862 		return ret;
863 	}
864 	new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, ALLOC_GC);
865 
866 	if (IS_ERR(new_fd)) {
867 		pr_warn("jffs2_write_dirent in garbage_collect_dirent failed: %ld\n",
868 			PTR_ERR(new_fd));
869 		return PTR_ERR(new_fd);
870 	}
871 	jffs2_add_fd_to_list(c, new_fd, &f->dents);
872 	return 0;
873 }
874 
875 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
876 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
877 {
878 	struct jffs2_full_dirent **fdp = &f->dents;
879 	int found = 0;
880 
881 	/* On a medium where we can't actually mark nodes obsolete
882 	   pernamently, such as NAND flash, we need to work out
883 	   whether this deletion dirent is still needed to actively
884 	   delete a 'real' dirent with the same name that's still
885 	   somewhere else on the flash. */
886 	if (!jffs2_can_mark_obsolete(c)) {
887 		struct jffs2_raw_dirent *rd;
888 		struct jffs2_raw_node_ref *raw;
889 		int ret;
890 		size_t retlen;
891 		int name_len = strlen(fd->name);
892 		uint32_t name_crc = crc32(0, fd->name, name_len);
893 		uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
894 
895 		rd = kmalloc(rawlen, GFP_KERNEL);
896 		if (!rd)
897 			return -ENOMEM;
898 
899 		/* Prevent the erase code from nicking the obsolete node refs while
900 		   we're looking at them. I really don't like this extra lock but
901 		   can't see any alternative. Suggestions on a postcard to... */
902 		mutex_lock(&c->erase_free_sem);
903 
904 		for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
905 
906 			cond_resched();
907 
908 			/* We only care about obsolete ones */
909 			if (!(ref_obsolete(raw)))
910 				continue;
911 
912 			/* Any dirent with the same name is going to have the same length... */
913 			if (ref_totlen(c, NULL, raw) != rawlen)
914 				continue;
915 
916 			/* Doesn't matter if there's one in the same erase block. We're going to
917 			   delete it too at the same time. */
918 			if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
919 				continue;
920 
921 			jffs2_dbg(1, "Check potential deletion dirent at %08x\n",
922 				  ref_offset(raw));
923 
924 			/* This is an obsolete node belonging to the same directory, and it's of the right
925 			   length. We need to take a closer look...*/
926 			ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
927 			if (ret) {
928 				pr_warn("%s(): Read error (%d) reading obsolete node at %08x\n",
929 					__func__, ret, ref_offset(raw));
930 				/* If we can't read it, we don't need to continue to obsolete it. Continue */
931 				continue;
932 			}
933 			if (retlen != rawlen) {
934 				pr_warn("%s(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
935 					__func__, retlen, rawlen,
936 					ref_offset(raw));
937 				continue;
938 			}
939 
940 			if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
941 				continue;
942 
943 			/* If the name CRC doesn't match, skip */
944 			if (je32_to_cpu(rd->name_crc) != name_crc)
945 				continue;
946 
947 			/* If the name length doesn't match, or it's another deletion dirent, skip */
948 			if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
949 				continue;
950 
951 			/* OK, check the actual name now */
952 			if (memcmp(rd->name, fd->name, name_len))
953 				continue;
954 
955 			/* OK. The name really does match. There really is still an older node on
956 			   the flash which our deletion dirent obsoletes. So we have to write out
957 			   a new deletion dirent to replace it */
958 			mutex_unlock(&c->erase_free_sem);
959 
960 			jffs2_dbg(1, "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
961 				  ref_offset(fd->raw), fd->name,
962 				  ref_offset(raw), je32_to_cpu(rd->ino));
963 			kfree(rd);
964 
965 			return jffs2_garbage_collect_dirent(c, jeb, f, fd);
966 		}
967 
968 		mutex_unlock(&c->erase_free_sem);
969 		kfree(rd);
970 	}
971 
972 	/* FIXME: If we're deleting a dirent which contains the current mtime and ctime,
973 	   we should update the metadata node with those times accordingly */
974 
975 	/* No need for it any more. Just mark it obsolete and remove it from the list */
976 	while (*fdp) {
977 		if ((*fdp) == fd) {
978 			found = 1;
979 			*fdp = fd->next;
980 			break;
981 		}
982 		fdp = &(*fdp)->next;
983 	}
984 	if (!found) {
985 		pr_warn("Deletion dirent \"%s\" not found in list for ino #%u\n",
986 			fd->name, f->inocache->ino);
987 	}
988 	jffs2_mark_node_obsolete(c, fd->raw);
989 	jffs2_free_full_dirent(fd);
990 	return 0;
991 }
992 
993 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
994 				      struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
995 				      uint32_t start, uint32_t end)
996 {
997 	struct jffs2_raw_inode ri;
998 	struct jffs2_node_frag *frag;
999 	struct jffs2_full_dnode *new_fn;
1000 	uint32_t alloclen, ilen;
1001 	int ret;
1002 
1003 	jffs2_dbg(1, "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
1004 		  f->inocache->ino, start, end);
1005 
1006 	memset(&ri, 0, sizeof(ri));
1007 
1008 	if(fn->frags > 1) {
1009 		size_t readlen;
1010 		uint32_t crc;
1011 		/* It's partially obsoleted by a later write. So we have to
1012 		   write it out again with the _same_ version as before */
1013 		ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
1014 		if (readlen != sizeof(ri) || ret) {
1015 			pr_warn("Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n",
1016 				ret, readlen);
1017 			goto fill;
1018 		}
1019 		if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
1020 			pr_warn("%s(): Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
1021 				__func__, ref_offset(fn->raw),
1022 				je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
1023 			return -EIO;
1024 		}
1025 		if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
1026 			pr_warn("%s(): Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
1027 				__func__, ref_offset(fn->raw),
1028 				je32_to_cpu(ri.totlen), sizeof(ri));
1029 			return -EIO;
1030 		}
1031 		crc = crc32(0, &ri, sizeof(ri)-8);
1032 		if (crc != je32_to_cpu(ri.node_crc)) {
1033 			pr_warn("%s: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
1034 				__func__, ref_offset(fn->raw),
1035 				je32_to_cpu(ri.node_crc), crc);
1036 			/* FIXME: We could possibly deal with this by writing new holes for each frag */
1037 			pr_warn("Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
1038 				start, end, f->inocache->ino);
1039 			goto fill;
1040 		}
1041 		if (ri.compr != JFFS2_COMPR_ZERO) {
1042 			pr_warn("%s(): Node 0x%08x wasn't a hole node!\n",
1043 				__func__, ref_offset(fn->raw));
1044 			pr_warn("Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
1045 				start, end, f->inocache->ino);
1046 			goto fill;
1047 		}
1048 	} else {
1049 	fill:
1050 		ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1051 		ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1052 		ri.totlen = cpu_to_je32(sizeof(ri));
1053 		ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1054 
1055 		ri.ino = cpu_to_je32(f->inocache->ino);
1056 		ri.version = cpu_to_je32(++f->highest_version);
1057 		ri.offset = cpu_to_je32(start);
1058 		ri.dsize = cpu_to_je32(end - start);
1059 		ri.csize = cpu_to_je32(0);
1060 		ri.compr = JFFS2_COMPR_ZERO;
1061 	}
1062 
1063 	frag = frag_last(&f->fragtree);
1064 	if (frag)
1065 		/* Fetch the inode length from the fragtree rather then
1066 		 * from i_size since i_size may have not been updated yet */
1067 		ilen = frag->ofs + frag->size;
1068 	else
1069 		ilen = JFFS2_F_I_SIZE(f);
1070 
1071 	ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1072 	ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1073 	ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1074 	ri.isize = cpu_to_je32(ilen);
1075 	ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1076 	ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1077 	ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1078 	ri.data_crc = cpu_to_je32(0);
1079 	ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1080 
1081 	ret = jffs2_reserve_space_gc(c, sizeof(ri), &alloclen,
1082 				     JFFS2_SUMMARY_INODE_SIZE);
1083 	if (ret) {
1084 		pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
1085 			sizeof(ri), ret);
1086 		return ret;
1087 	}
1088 	new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, ALLOC_GC);
1089 
1090 	if (IS_ERR(new_fn)) {
1091 		pr_warn("Error writing new hole node: %ld\n", PTR_ERR(new_fn));
1092 		return PTR_ERR(new_fn);
1093 	}
1094 	if (je32_to_cpu(ri.version) == f->highest_version) {
1095 		jffs2_add_full_dnode_to_inode(c, f, new_fn);
1096 		if (f->metadata) {
1097 			jffs2_mark_node_obsolete(c, f->metadata->raw);
1098 			jffs2_free_full_dnode(f->metadata);
1099 			f->metadata = NULL;
1100 		}
1101 		return 0;
1102 	}
1103 
1104 	/*
1105 	 * We should only get here in the case where the node we are
1106 	 * replacing had more than one frag, so we kept the same version
1107 	 * number as before. (Except in case of error -- see 'goto fill;'
1108 	 * above.)
1109 	 */
1110 	D1(if(unlikely(fn->frags <= 1)) {
1111 			pr_warn("%s(): Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
1112 				__func__, fn->frags, je32_to_cpu(ri.version),
1113 				f->highest_version, je32_to_cpu(ri.ino));
1114 	});
1115 
1116 	/* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
1117 	mark_ref_normal(new_fn->raw);
1118 
1119 	for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs);
1120 	     frag; frag = frag_next(frag)) {
1121 		if (frag->ofs > fn->size + fn->ofs)
1122 			break;
1123 		if (frag->node == fn) {
1124 			frag->node = new_fn;
1125 			new_fn->frags++;
1126 			fn->frags--;
1127 		}
1128 	}
1129 	if (fn->frags) {
1130 		pr_warn("%s(): Old node still has frags!\n", __func__);
1131 		BUG();
1132 	}
1133 	if (!new_fn->frags) {
1134 		pr_warn("%s(): New node has no frags!\n", __func__);
1135 		BUG();
1136 	}
1137 
1138 	jffs2_mark_node_obsolete(c, fn->raw);
1139 	jffs2_free_full_dnode(fn);
1140 
1141 	return 0;
1142 }
1143 
1144 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *orig_jeb,
1145 				       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1146 				       uint32_t start, uint32_t end)
1147 {
1148 	struct jffs2_full_dnode *new_fn;
1149 	struct jffs2_raw_inode ri;
1150 	uint32_t alloclen, offset, orig_end, orig_start;
1151 	int ret = 0;
1152 	unsigned char *comprbuf = NULL, *writebuf;
1153 	unsigned long pg;
1154 	unsigned char *pg_ptr;
1155 
1156 	memset(&ri, 0, sizeof(ri));
1157 
1158 	jffs2_dbg(1, "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1159 		  f->inocache->ino, start, end);
1160 
1161 	orig_end = end;
1162 	orig_start = start;
1163 
1164 	if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1165 		/* Attempt to do some merging. But only expand to cover logically
1166 		   adjacent frags if the block containing them is already considered
1167 		   to be dirty. Otherwise we end up with GC just going round in
1168 		   circles dirtying the nodes it already wrote out, especially
1169 		   on NAND where we have small eraseblocks and hence a much higher
1170 		   chance of nodes having to be split to cross boundaries. */
1171 
1172 		struct jffs2_node_frag *frag;
1173 		uint32_t min, max;
1174 
1175 		min = start & ~(PAGE_CACHE_SIZE-1);
1176 		max = min + PAGE_CACHE_SIZE;
1177 
1178 		frag = jffs2_lookup_node_frag(&f->fragtree, start);
1179 
1180 		/* BUG_ON(!frag) but that'll happen anyway... */
1181 
1182 		BUG_ON(frag->ofs != start);
1183 
1184 		/* First grow down... */
1185 		while((frag = frag_prev(frag)) && frag->ofs >= min) {
1186 
1187 			/* If the previous frag doesn't even reach the beginning, there's
1188 			   excessive fragmentation. Just merge. */
1189 			if (frag->ofs > min) {
1190 				jffs2_dbg(1, "Expanding down to cover partial frag (0x%x-0x%x)\n",
1191 					  frag->ofs, frag->ofs+frag->size);
1192 				start = frag->ofs;
1193 				continue;
1194 			}
1195 			/* OK. This frag holds the first byte of the page. */
1196 			if (!frag->node || !frag->node->raw) {
1197 				jffs2_dbg(1, "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1198 					  frag->ofs, frag->ofs+frag->size);
1199 				break;
1200 			} else {
1201 
1202 				/* OK, it's a frag which extends to the beginning of the page. Does it live
1203 				   in a block which is still considered clean? If so, don't obsolete it.
1204 				   If not, cover it anyway. */
1205 
1206 				struct jffs2_raw_node_ref *raw = frag->node->raw;
1207 				struct jffs2_eraseblock *jeb;
1208 
1209 				jeb = &c->blocks[raw->flash_offset / c->sector_size];
1210 
1211 				if (jeb == c->gcblock) {
1212 					jffs2_dbg(1, "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1213 						  frag->ofs,
1214 						  frag->ofs + frag->size,
1215 						  ref_offset(raw));
1216 					start = frag->ofs;
1217 					break;
1218 				}
1219 				if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1220 					jffs2_dbg(1, "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1221 						  frag->ofs,
1222 						  frag->ofs + frag->size,
1223 						  jeb->offset);
1224 					break;
1225 				}
1226 
1227 				jffs2_dbg(1, "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1228 					  frag->ofs,
1229 					  frag->ofs + frag->size,
1230 					  jeb->offset);
1231 				start = frag->ofs;
1232 				break;
1233 			}
1234 		}
1235 
1236 		/* ... then up */
1237 
1238 		/* Find last frag which is actually part of the node we're to GC. */
1239 		frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1240 
1241 		while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1242 
1243 			/* If the previous frag doesn't even reach the beginning, there's lots
1244 			   of fragmentation. Just merge. */
1245 			if (frag->ofs+frag->size < max) {
1246 				jffs2_dbg(1, "Expanding up to cover partial frag (0x%x-0x%x)\n",
1247 					  frag->ofs, frag->ofs+frag->size);
1248 				end = frag->ofs + frag->size;
1249 				continue;
1250 			}
1251 
1252 			if (!frag->node || !frag->node->raw) {
1253 				jffs2_dbg(1, "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1254 					  frag->ofs, frag->ofs+frag->size);
1255 				break;
1256 			} else {
1257 
1258 				/* OK, it's a frag which extends to the beginning of the page. Does it live
1259 				   in a block which is still considered clean? If so, don't obsolete it.
1260 				   If not, cover it anyway. */
1261 
1262 				struct jffs2_raw_node_ref *raw = frag->node->raw;
1263 				struct jffs2_eraseblock *jeb;
1264 
1265 				jeb = &c->blocks[raw->flash_offset / c->sector_size];
1266 
1267 				if (jeb == c->gcblock) {
1268 					jffs2_dbg(1, "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1269 						  frag->ofs,
1270 						  frag->ofs + frag->size,
1271 						  ref_offset(raw));
1272 					end = frag->ofs + frag->size;
1273 					break;
1274 				}
1275 				if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1276 					jffs2_dbg(1, "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1277 						  frag->ofs,
1278 						  frag->ofs + frag->size,
1279 						  jeb->offset);
1280 					break;
1281 				}
1282 
1283 				jffs2_dbg(1, "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1284 					  frag->ofs,
1285 					  frag->ofs + frag->size,
1286 					  jeb->offset);
1287 				end = frag->ofs + frag->size;
1288 				break;
1289 			}
1290 		}
1291 		jffs2_dbg(1, "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
1292 			  orig_start, orig_end, start, end);
1293 
1294 		D1(BUG_ON(end > frag_last(&f->fragtree)->ofs + frag_last(&f->fragtree)->size));
1295 		BUG_ON(end < orig_end);
1296 		BUG_ON(start > orig_start);
1297 	}
1298 
1299 	/* First, use readpage() to read the appropriate page into the page cache */
1300 	/* Q: What happens if we actually try to GC the _same_ page for which commit_write()
1301 	 *    triggered garbage collection in the first place?
1302 	 * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
1303 	 *    page OK. We'll actually write it out again in commit_write, which is a little
1304 	 *    suboptimal, but at least we're correct.
1305 	 */
1306 	pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
1307 
1308 	if (IS_ERR(pg_ptr)) {
1309 		pr_warn("read_cache_page() returned error: %ld\n",
1310 			PTR_ERR(pg_ptr));
1311 		return PTR_ERR(pg_ptr);
1312 	}
1313 
1314 	offset = start;
1315 	while(offset < orig_end) {
1316 		uint32_t datalen;
1317 		uint32_t cdatalen;
1318 		uint16_t comprtype = JFFS2_COMPR_NONE;
1319 
1320 		ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN,
1321 					&alloclen, JFFS2_SUMMARY_INODE_SIZE);
1322 
1323 		if (ret) {
1324 			pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1325 				sizeof(ri) + JFFS2_MIN_DATA_LEN, ret);
1326 			break;
1327 		}
1328 		cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1329 		datalen = end - offset;
1330 
1331 		writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
1332 
1333 		comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1334 
1335 		ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1336 		ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1337 		ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1338 		ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1339 
1340 		ri.ino = cpu_to_je32(f->inocache->ino);
1341 		ri.version = cpu_to_je32(++f->highest_version);
1342 		ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1343 		ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1344 		ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1345 		ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1346 		ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1347 		ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1348 		ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1349 		ri.offset = cpu_to_je32(offset);
1350 		ri.csize = cpu_to_je32(cdatalen);
1351 		ri.dsize = cpu_to_je32(datalen);
1352 		ri.compr = comprtype & 0xff;
1353 		ri.usercompr = (comprtype >> 8) & 0xff;
1354 		ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1355 		ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
1356 
1357 		new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, ALLOC_GC);
1358 
1359 		jffs2_free_comprbuf(comprbuf, writebuf);
1360 
1361 		if (IS_ERR(new_fn)) {
1362 			pr_warn("Error writing new dnode: %ld\n",
1363 				PTR_ERR(new_fn));
1364 			ret = PTR_ERR(new_fn);
1365 			break;
1366 		}
1367 		ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1368 		offset += datalen;
1369 		if (f->metadata) {
1370 			jffs2_mark_node_obsolete(c, f->metadata->raw);
1371 			jffs2_free_full_dnode(f->metadata);
1372 			f->metadata = NULL;
1373 		}
1374 	}
1375 
1376 	jffs2_gc_release_page(c, pg_ptr, &pg);
1377 	return ret;
1378 }
1379