xref: /linux/fs/ocfs2/namei.c (revision f24e9f586b377749dff37554696cf3a105540c94)
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * namei.c
5  *
6  * Create and rename file, directory, symlinks
7  *
8  * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
9  *
10  *  Portions of this code from linux/fs/ext3/dir.c
11  *
12  *  Copyright (C) 1992, 1993, 1994, 1995
13  *  Remy Card (card@masi.ibp.fr)
14  *  Laboratoire MASI - Institut Blaise pascal
15  *  Universite Pierre et Marie Curie (Paris VI)
16  *
17  *   from
18  *
19  *   linux/fs/minix/dir.c
20  *
21  *   Copyright (C) 1991, 1992 Linux Torvalds
22  *
23  * This program is free software; you can redistribute it and/or
24  * modify it under the terms of the GNU General Public
25  * License as published by the Free Software Foundation; either
26  * version 2 of the License, or (at your option) any later version.
27  *
28  * This program is distributed in the hope that it will be useful,
29  * but WITHOUT ANY WARRANTY; without even the implied warranty of
30  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
31  * General Public License for more details.
32  *
33  * You should have received a copy of the GNU General Public
34  * License along with this program; if not, write to the
35  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
36  * Boston, MA 021110-1307, USA.
37  */
38 
39 #include <linux/fs.h>
40 #include <linux/types.h>
41 #include <linux/slab.h>
42 #include <linux/highmem.h>
43 
44 #define MLOG_MASK_PREFIX ML_NAMEI
45 #include <cluster/masklog.h>
46 
47 #include "ocfs2.h"
48 
49 #include "alloc.h"
50 #include "dcache.h"
51 #include "dir.h"
52 #include "dlmglue.h"
53 #include "extent_map.h"
54 #include "file.h"
55 #include "inode.h"
56 #include "journal.h"
57 #include "namei.h"
58 #include "suballoc.h"
59 #include "super.h"
60 #include "symlink.h"
61 #include "sysfile.h"
62 #include "uptodate.h"
63 #include "vote.h"
64 
65 #include "buffer_head_io.h"
66 
67 #define NAMEI_RA_CHUNKS  2
68 #define NAMEI_RA_BLOCKS  4
69 #define NAMEI_RA_SIZE        (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
70 #define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))
71 
72 static int inline ocfs2_search_dirblock(struct buffer_head *bh,
73 					struct inode *dir,
74 					const char *name, int namelen,
75 					unsigned long offset,
76 					struct ocfs2_dir_entry **res_dir);
77 
78 static int ocfs2_delete_entry(struct ocfs2_journal_handle *handle,
79 			      struct inode *dir,
80 			      struct ocfs2_dir_entry *de_del,
81 			      struct buffer_head *bh);
82 
83 static int __ocfs2_add_entry(struct ocfs2_journal_handle *handle,
84 			     struct inode *dir,
85 			     const char *name, int namelen,
86 			     struct inode *inode, u64 blkno,
87 			     struct buffer_head *parent_fe_bh,
88 			     struct buffer_head *insert_bh);
89 
90 static int ocfs2_mknod_locked(struct ocfs2_super *osb,
91 			      struct inode *dir,
92 			      struct dentry *dentry, int mode,
93 			      dev_t dev,
94 			      struct buffer_head **new_fe_bh,
95 			      struct buffer_head *parent_fe_bh,
96 			      struct ocfs2_journal_handle *handle,
97 			      struct inode **ret_inode,
98 			      struct ocfs2_alloc_context *inode_ac);
99 
100 static int ocfs2_fill_new_dir(struct ocfs2_super *osb,
101 			      struct ocfs2_journal_handle *handle,
102 			      struct inode *parent,
103 			      struct inode *inode,
104 			      struct buffer_head *fe_bh,
105 			      struct ocfs2_alloc_context *data_ac);
106 
107 static int ocfs2_double_lock(struct ocfs2_super *osb,
108 			     struct ocfs2_journal_handle *handle,
109 			     struct buffer_head **bh1,
110 			     struct inode *inode1,
111 			     struct buffer_head **bh2,
112 			     struct inode *inode2);
113 
114 static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
115 				    struct ocfs2_journal_handle *handle,
116 				    struct inode *inode,
117 				    char *name,
118 				    struct buffer_head **de_bh);
119 
120 static int ocfs2_orphan_add(struct ocfs2_super *osb,
121 			    struct ocfs2_journal_handle *handle,
122 			    struct inode *inode,
123 			    struct ocfs2_dinode *fe,
124 			    char *name,
125 			    struct buffer_head *de_bh);
126 
127 static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
128 				     struct ocfs2_journal_handle *handle,
129 				     struct inode *inode,
130 				     const char *symname);
131 
132 static inline int ocfs2_add_entry(struct ocfs2_journal_handle *handle,
133 				  struct dentry *dentry,
134 				  struct inode *inode, u64 blkno,
135 				  struct buffer_head *parent_fe_bh,
136 				  struct buffer_head *insert_bh)
137 {
138 	return __ocfs2_add_entry(handle, dentry->d_parent->d_inode,
139 				 dentry->d_name.name, dentry->d_name.len,
140 				 inode, blkno, parent_fe_bh, insert_bh);
141 }
142 
143 /* An orphan dir name is an 8 byte value, printed as a hex string */
144 #define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64)))
145 
146 static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
147 				   struct nameidata *nd)
148 {
149 	int status;
150 	u64 blkno;
151 	struct buffer_head *dirent_bh = NULL;
152 	struct inode *inode = NULL;
153 	struct dentry *ret;
154 	struct ocfs2_dir_entry *dirent;
155 	struct ocfs2_inode_info *oi;
156 
157 	mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry,
158 		   dentry->d_name.len, dentry->d_name.name);
159 
160 	if (dentry->d_name.len > OCFS2_MAX_FILENAME_LEN) {
161 		ret = ERR_PTR(-ENAMETOOLONG);
162 		goto bail;
163 	}
164 
165 	mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len,
166 	     dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno);
167 
168 	status = ocfs2_meta_lock(dir, NULL, NULL, 0);
169 	if (status < 0) {
170 		if (status != -ENOENT)
171 			mlog_errno(status);
172 		ret = ERR_PTR(status);
173 		goto bail;
174 	}
175 
176 	status = ocfs2_find_files_on_disk(dentry->d_name.name,
177 					  dentry->d_name.len, &blkno,
178 					  dir, &dirent_bh, &dirent);
179 	if (status < 0)
180 		goto bail_add;
181 
182 	inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno);
183 	if (IS_ERR(inode)) {
184 		mlog(ML_ERROR, "Unable to create inode %llu\n",
185 		     (unsigned long long)blkno);
186 		ret = ERR_PTR(-EACCES);
187 		goto bail_unlock;
188 	}
189 
190 	oi = OCFS2_I(inode);
191 	/* Clear any orphaned state... If we were able to look up the
192 	 * inode from a directory, it certainly can't be orphaned. We
193 	 * might have the bad state from a node which intended to
194 	 * orphan this inode but crashed before it could commit the
195 	 * unlink. */
196 	spin_lock(&oi->ip_lock);
197 	oi->ip_flags &= ~OCFS2_INODE_MAYBE_ORPHANED;
198 	oi->ip_orphaned_slot = OCFS2_INVALID_SLOT;
199 	spin_unlock(&oi->ip_lock);
200 
201 bail_add:
202 
203 	dentry->d_op = &ocfs2_dentry_ops;
204 	ret = d_splice_alias(inode, dentry);
205 
206 bail_unlock:
207 	/* Don't drop the cluster lock until *after* the d_add --
208 	 * unlink on another node will message us to remove that
209 	 * dentry under this lock so otherwise we can race this with
210 	 * the vote thread and have a stale dentry. */
211 	ocfs2_meta_unlock(dir, 0);
212 
213 bail:
214 	if (dirent_bh)
215 		brelse(dirent_bh);
216 
217 	mlog_exit_ptr(ret);
218 
219 	return ret;
220 }
221 
222 static int ocfs2_fill_new_dir(struct ocfs2_super *osb,
223 			      struct ocfs2_journal_handle *handle,
224 			      struct inode *parent,
225 			      struct inode *inode,
226 			      struct buffer_head *fe_bh,
227 			      struct ocfs2_alloc_context *data_ac)
228 {
229 	int status;
230 	struct buffer_head *new_bh = NULL;
231 	struct ocfs2_dir_entry *de = NULL;
232 
233 	mlog_entry_void();
234 
235 	status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh,
236 				     data_ac, NULL, &new_bh);
237 	if (status < 0) {
238 		mlog_errno(status);
239 		goto bail;
240 	}
241 
242 	ocfs2_set_new_buffer_uptodate(inode, new_bh);
243 
244 	status = ocfs2_journal_access(handle, inode, new_bh,
245 				      OCFS2_JOURNAL_ACCESS_CREATE);
246 	if (status < 0) {
247 		mlog_errno(status);
248 		goto bail;
249 	}
250 	memset(new_bh->b_data, 0, osb->sb->s_blocksize);
251 
252 	de = (struct ocfs2_dir_entry *) new_bh->b_data;
253 	de->inode = cpu_to_le64(OCFS2_I(inode)->ip_blkno);
254 	de->name_len = 1;
255 	de->rec_len =
256 		cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
257 	strcpy(de->name, ".");
258 	ocfs2_set_de_type(de, S_IFDIR);
259 	de = (struct ocfs2_dir_entry *) ((char *)de + le16_to_cpu(de->rec_len));
260 	de->inode = cpu_to_le64(OCFS2_I(parent)->ip_blkno);
261 	de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize -
262 				  OCFS2_DIR_REC_LEN(1));
263 	de->name_len = 2;
264 	strcpy(de->name, "..");
265 	ocfs2_set_de_type(de, S_IFDIR);
266 
267 	status = ocfs2_journal_dirty(handle, new_bh);
268 	if (status < 0) {
269 		mlog_errno(status);
270 		goto bail;
271 	}
272 
273 	i_size_write(inode, inode->i_sb->s_blocksize);
274 	inode->i_nlink = 2;
275 	inode->i_blocks = ocfs2_align_bytes_to_sectors(inode->i_sb->s_blocksize);
276 	status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
277 	if (status < 0) {
278 		mlog_errno(status);
279 		goto bail;
280 	}
281 
282 	status = 0;
283 bail:
284 	if (new_bh)
285 		brelse(new_bh);
286 
287 	mlog_exit(status);
288 	return status;
289 }
290 
291 static int ocfs2_mknod(struct inode *dir,
292 		       struct dentry *dentry,
293 		       int mode,
294 		       dev_t dev)
295 {
296 	int status = 0;
297 	struct buffer_head *parent_fe_bh = NULL;
298 	struct ocfs2_journal_handle *handle = NULL;
299 	struct ocfs2_super *osb;
300 	struct ocfs2_dinode *dirfe;
301 	struct buffer_head *new_fe_bh = NULL;
302 	struct buffer_head *de_bh = NULL;
303 	struct inode *inode = NULL;
304 	struct ocfs2_alloc_context *inode_ac = NULL;
305 	struct ocfs2_alloc_context *data_ac = NULL;
306 
307 	mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode,
308 		   (unsigned long)dev, dentry->d_name.len,
309 		   dentry->d_name.name);
310 
311 	/* get our super block */
312 	osb = OCFS2_SB(dir->i_sb);
313 
314 	handle = ocfs2_alloc_handle(osb);
315 	if (handle == NULL) {
316 		status = -ENOMEM;
317 		mlog_errno(status);
318 		goto leave;
319 	}
320 
321 	status = ocfs2_meta_lock(dir, handle, &parent_fe_bh, 1);
322 	if (status < 0) {
323 		if (status != -ENOENT)
324 			mlog_errno(status);
325 		goto leave;
326 	}
327 
328 	if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) {
329 		status = -EMLINK;
330 		goto leave;
331 	}
332 
333 	dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
334 	if (!dirfe->i_links_count) {
335 		/* can't make a file in a deleted directory. */
336 		status = -ENOENT;
337 		goto leave;
338 	}
339 
340 	status = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
341 					   dentry->d_name.len);
342 	if (status)
343 		goto leave;
344 
345 	/* get a spot inside the dir. */
346 	status = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh,
347 					      dentry->d_name.name,
348 					      dentry->d_name.len, &de_bh);
349 	if (status < 0) {
350 		mlog_errno(status);
351 		goto leave;
352 	}
353 
354 	/* reserve an inode spot */
355 	status = ocfs2_reserve_new_inode(osb, handle, &inode_ac);
356 	if (status < 0) {
357 		if (status != -ENOSPC)
358 			mlog_errno(status);
359 		goto leave;
360 	}
361 
362 	/* are we making a directory? If so, reserve a cluster for his
363 	 * 1st extent. */
364 	if (S_ISDIR(mode)) {
365 		status = ocfs2_reserve_clusters(osb, handle, 1, &data_ac);
366 		if (status < 0) {
367 			if (status != -ENOSPC)
368 				mlog_errno(status);
369 			goto leave;
370 		}
371 	}
372 
373 	handle = ocfs2_start_trans(osb, handle, OCFS2_MKNOD_CREDITS);
374 	if (IS_ERR(handle)) {
375 		status = PTR_ERR(handle);
376 		handle = NULL;
377 		mlog_errno(status);
378 		goto leave;
379 	}
380 
381 	/* do the real work now. */
382 	status = ocfs2_mknod_locked(osb, dir, dentry, mode, dev,
383 				    &new_fe_bh, parent_fe_bh, handle,
384 				    &inode, inode_ac);
385 	if (status < 0) {
386 		mlog_errno(status);
387 		goto leave;
388 	}
389 
390 	if (S_ISDIR(mode)) {
391 		status = ocfs2_fill_new_dir(osb, handle, dir, inode,
392 					    new_fe_bh, data_ac);
393 		if (status < 0) {
394 			mlog_errno(status);
395 			goto leave;
396 		}
397 
398 		status = ocfs2_journal_access(handle, dir, parent_fe_bh,
399 					      OCFS2_JOURNAL_ACCESS_WRITE);
400 		if (status < 0) {
401 			mlog_errno(status);
402 			goto leave;
403 		}
404 		le16_add_cpu(&dirfe->i_links_count, 1);
405 		status = ocfs2_journal_dirty(handle, parent_fe_bh);
406 		if (status < 0) {
407 			mlog_errno(status);
408 			goto leave;
409 		}
410 		dir->i_nlink++;
411 	}
412 
413 	status = ocfs2_add_entry(handle, dentry, inode,
414 				 OCFS2_I(inode)->ip_blkno, parent_fe_bh,
415 				 de_bh);
416 	if (status < 0) {
417 		mlog_errno(status);
418 		goto leave;
419 	}
420 
421 	insert_inode_hash(inode);
422 	dentry->d_op = &ocfs2_dentry_ops;
423 	d_instantiate(dentry, inode);
424 	status = 0;
425 leave:
426 	if (handle)
427 		ocfs2_commit_trans(handle);
428 
429 	if (status == -ENOSPC)
430 		mlog(0, "Disk is full\n");
431 
432 	if (new_fe_bh)
433 		brelse(new_fe_bh);
434 
435 	if (de_bh)
436 		brelse(de_bh);
437 
438 	if (parent_fe_bh)
439 		brelse(parent_fe_bh);
440 
441 	if ((status < 0) && inode)
442 		iput(inode);
443 
444 	if (inode_ac)
445 		ocfs2_free_alloc_context(inode_ac);
446 
447 	if (data_ac)
448 		ocfs2_free_alloc_context(data_ac);
449 
450 	mlog_exit(status);
451 
452 	return status;
453 }
454 
455 static int ocfs2_mknod_locked(struct ocfs2_super *osb,
456 			      struct inode *dir,
457 			      struct dentry *dentry, int mode,
458 			      dev_t dev,
459 			      struct buffer_head **new_fe_bh,
460 			      struct buffer_head *parent_fe_bh,
461 			      struct ocfs2_journal_handle *handle,
462 			      struct inode **ret_inode,
463 			      struct ocfs2_alloc_context *inode_ac)
464 {
465 	int status = 0;
466 	struct ocfs2_dinode *fe = NULL;
467 	struct ocfs2_extent_list *fel;
468 	u64 fe_blkno = 0;
469 	u16 suballoc_bit;
470 	struct inode *inode = NULL;
471 
472 	mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode,
473 		   (unsigned long)dev, dentry->d_name.len,
474 		   dentry->d_name.name);
475 
476 	*new_fe_bh = NULL;
477 	*ret_inode = NULL;
478 
479 	status = ocfs2_claim_new_inode(osb, handle, inode_ac, &suballoc_bit,
480 				       &fe_blkno);
481 	if (status < 0) {
482 		mlog_errno(status);
483 		goto leave;
484 	}
485 
486 	inode = new_inode(dir->i_sb);
487 	if (IS_ERR(inode)) {
488 		status = PTR_ERR(inode);
489 		mlog(ML_ERROR, "new_inode failed!\n");
490 		goto leave;
491 	}
492 
493 	/* populate as many fields early on as possible - many of
494 	 * these are used by the support functions here and in
495 	 * callers. */
496 	inode->i_ino = ino_from_blkno(osb->sb, fe_blkno);
497 	OCFS2_I(inode)->ip_blkno = fe_blkno;
498 	if (S_ISDIR(mode))
499 		inode->i_nlink = 2;
500 	else
501 		inode->i_nlink = 1;
502 	inode->i_mode = mode;
503 	spin_lock(&osb->osb_lock);
504 	inode->i_generation = osb->s_next_generation++;
505 	spin_unlock(&osb->osb_lock);
506 
507 	*new_fe_bh = sb_getblk(osb->sb, fe_blkno);
508 	if (!*new_fe_bh) {
509 		status = -EIO;
510 		mlog_errno(status);
511 		goto leave;
512 	}
513 	ocfs2_set_new_buffer_uptodate(inode, *new_fe_bh);
514 
515 	status = ocfs2_journal_access(handle, inode, *new_fe_bh,
516 				      OCFS2_JOURNAL_ACCESS_CREATE);
517 	if (status < 0) {
518 		mlog_errno(status);
519 		goto leave;
520 	}
521 
522 	fe = (struct ocfs2_dinode *) (*new_fe_bh)->b_data;
523 	memset(fe, 0, osb->sb->s_blocksize);
524 
525 	fe->i_generation = cpu_to_le32(inode->i_generation);
526 	fe->i_fs_generation = cpu_to_le32(osb->fs_generation);
527 	fe->i_blkno = cpu_to_le64(fe_blkno);
528 	fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
529 	fe->i_suballoc_slot = cpu_to_le16(osb->slot_num);
530 	fe->i_uid = cpu_to_le32(current->fsuid);
531 	if (dir->i_mode & S_ISGID) {
532 		fe->i_gid = cpu_to_le32(dir->i_gid);
533 		if (S_ISDIR(mode))
534 			mode |= S_ISGID;
535 	} else
536 		fe->i_gid = cpu_to_le32(current->fsgid);
537 	fe->i_mode = cpu_to_le16(mode);
538 	if (S_ISCHR(mode) || S_ISBLK(mode))
539 		fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
540 
541 	fe->i_links_count = cpu_to_le16(inode->i_nlink);
542 
543 	fe->i_last_eb_blk = 0;
544 	strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE);
545 	le32_add_cpu(&fe->i_flags, OCFS2_VALID_FL);
546 	fe->i_atime = fe->i_ctime = fe->i_mtime =
547 		cpu_to_le64(CURRENT_TIME.tv_sec);
548 	fe->i_mtime_nsec = fe->i_ctime_nsec = fe->i_atime_nsec =
549 		cpu_to_le32(CURRENT_TIME.tv_nsec);
550 	fe->i_dtime = 0;
551 
552 	fel = &fe->id2.i_list;
553 	fel->l_tree_depth = 0;
554 	fel->l_next_free_rec = 0;
555 	fel->l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(osb->sb));
556 
557 	status = ocfs2_journal_dirty(handle, *new_fe_bh);
558 	if (status < 0) {
559 		mlog_errno(status);
560 		goto leave;
561 	}
562 
563 	if (ocfs2_populate_inode(inode, fe, 1) < 0) {
564 		mlog(ML_ERROR, "populate inode failed! bh->b_blocknr=%llu, "
565 		     "i_blkno=%llu, i_ino=%lu\n",
566 		     (unsigned long long) (*new_fe_bh)->b_blocknr,
567 		     (unsigned long long)fe->i_blkno, inode->i_ino);
568 		BUG();
569 	}
570 
571 	ocfs2_inode_set_new(osb, inode);
572 	status = ocfs2_create_new_inode_locks(inode);
573 	if (status < 0)
574 		mlog_errno(status);
575 
576 	status = 0; /* error in ocfs2_create_new_inode_locks is not
577 		     * critical */
578 
579 	*ret_inode = inode;
580 leave:
581 	if (status < 0) {
582 		if (*new_fe_bh) {
583 			brelse(*new_fe_bh);
584 			*new_fe_bh = NULL;
585 		}
586 		if (inode)
587 			iput(inode);
588 	}
589 
590 	mlog_exit(status);
591 	return status;
592 }
593 
594 static int ocfs2_mkdir(struct inode *dir,
595 		       struct dentry *dentry,
596 		       int mode)
597 {
598 	int ret;
599 
600 	mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry, mode,
601 		   dentry->d_name.len, dentry->d_name.name);
602 	ret = ocfs2_mknod(dir, dentry, mode | S_IFDIR, 0);
603 	mlog_exit(ret);
604 
605 	return ret;
606 }
607 
608 static int ocfs2_create(struct inode *dir,
609 			struct dentry *dentry,
610 			int mode,
611 			struct nameidata *nd)
612 {
613 	int ret;
614 
615 	mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry, mode,
616 		   dentry->d_name.len, dentry->d_name.name);
617 	ret = ocfs2_mknod(dir, dentry, mode | S_IFREG, 0);
618 	mlog_exit(ret);
619 
620 	return ret;
621 }
622 
623 static int ocfs2_link(struct dentry *old_dentry,
624 		      struct inode *dir,
625 		      struct dentry *dentry)
626 {
627 	struct ocfs2_journal_handle *handle = NULL;
628 	struct inode *inode = old_dentry->d_inode;
629 	int err;
630 	struct buffer_head *fe_bh = NULL;
631 	struct buffer_head *parent_fe_bh = NULL;
632 	struct buffer_head *de_bh = NULL;
633 	struct ocfs2_dinode *fe = NULL;
634 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
635 
636 	mlog_entry("(inode=%lu, old='%.*s' new='%.*s')\n", inode->i_ino,
637 		   old_dentry->d_name.len, old_dentry->d_name.name,
638 		   dentry->d_name.len, dentry->d_name.name);
639 
640 	if (S_ISDIR(inode->i_mode)) {
641 		err = -EPERM;
642 		goto bail;
643 	}
644 
645 	handle = ocfs2_alloc_handle(osb);
646 	if (handle == NULL) {
647 		err = -ENOMEM;
648 		goto bail;
649 	}
650 
651 	err = ocfs2_meta_lock(dir, handle, &parent_fe_bh, 1);
652 	if (err < 0) {
653 		if (err != -ENOENT)
654 			mlog_errno(err);
655 		goto bail;
656 	}
657 
658 	if (!dir->i_nlink) {
659 		err = -ENOENT;
660 		goto bail;
661 	}
662 
663 	err = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
664 					dentry->d_name.len);
665 	if (err)
666 		goto bail;
667 
668 	err = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh,
669 					   dentry->d_name.name,
670 					   dentry->d_name.len, &de_bh);
671 	if (err < 0) {
672 		mlog_errno(err);
673 		goto bail;
674 	}
675 
676 	err = ocfs2_meta_lock(inode, handle, &fe_bh, 1);
677 	if (err < 0) {
678 		if (err != -ENOENT)
679 			mlog_errno(err);
680 		goto bail;
681 	}
682 
683 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
684 	if (le16_to_cpu(fe->i_links_count) >= OCFS2_LINK_MAX) {
685 		err = -EMLINK;
686 		goto bail;
687 	}
688 
689 	handle = ocfs2_start_trans(osb, handle, OCFS2_LINK_CREDITS);
690 	if (IS_ERR(handle)) {
691 		err = PTR_ERR(handle);
692 		handle = NULL;
693 		mlog_errno(err);
694 		goto bail;
695 	}
696 
697 	err = ocfs2_journal_access(handle, inode, fe_bh,
698 				   OCFS2_JOURNAL_ACCESS_WRITE);
699 	if (err < 0) {
700 		mlog_errno(err);
701 		goto bail;
702 	}
703 
704 	inode->i_nlink++;
705 	inode->i_ctime = CURRENT_TIME;
706 	fe->i_links_count = cpu_to_le16(inode->i_nlink);
707 	fe->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
708 	fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
709 
710 	err = ocfs2_journal_dirty(handle, fe_bh);
711 	if (err < 0) {
712 		le16_add_cpu(&fe->i_links_count, -1);
713 		inode->i_nlink--;
714 		mlog_errno(err);
715 		goto bail;
716 	}
717 
718 	err = ocfs2_add_entry(handle, dentry, inode,
719 			      OCFS2_I(inode)->ip_blkno,
720 			      parent_fe_bh, de_bh);
721 	if (err) {
722 		le16_add_cpu(&fe->i_links_count, -1);
723 		inode->i_nlink--;
724 		mlog_errno(err);
725 		goto bail;
726 	}
727 
728 	atomic_inc(&inode->i_count);
729 	dentry->d_op = &ocfs2_dentry_ops;
730 	d_instantiate(dentry, inode);
731 bail:
732 	if (handle)
733 		ocfs2_commit_trans(handle);
734 	if (de_bh)
735 		brelse(de_bh);
736 	if (fe_bh)
737 		brelse(fe_bh);
738 	if (parent_fe_bh)
739 		brelse(parent_fe_bh);
740 
741 	mlog_exit(err);
742 
743 	return err;
744 }
745 
746 static int ocfs2_unlink(struct inode *dir,
747 			struct dentry *dentry)
748 {
749 	int status;
750 	unsigned int saved_nlink = 0;
751 	struct inode *inode = dentry->d_inode;
752 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
753 	u64 blkno;
754 	struct ocfs2_dinode *fe = NULL;
755 	struct buffer_head *fe_bh = NULL;
756 	struct buffer_head *parent_node_bh = NULL;
757 	struct ocfs2_journal_handle *handle = NULL;
758 	struct ocfs2_dir_entry *dirent = NULL;
759 	struct buffer_head *dirent_bh = NULL;
760 	char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
761 	struct buffer_head *orphan_entry_bh = NULL;
762 
763 	mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry,
764 		   dentry->d_name.len, dentry->d_name.name);
765 
766 	BUG_ON(dentry->d_parent->d_inode != dir);
767 
768 	mlog(0, "ino = %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);
769 
770 	if (inode == osb->root_inode) {
771 		mlog(0, "Cannot delete the root directory\n");
772 		status = -EPERM;
773 		goto leave;
774 	}
775 
776 	handle = ocfs2_alloc_handle(osb);
777 	if (handle == NULL) {
778 		status = -ENOMEM;
779 		mlog_errno(status);
780 		goto leave;
781 	}
782 
783 	status = ocfs2_meta_lock(dir, handle, &parent_node_bh, 1);
784 	if (status < 0) {
785 		if (status != -ENOENT)
786 			mlog_errno(status);
787 		goto leave;
788 	}
789 
790 	status = ocfs2_find_files_on_disk(dentry->d_name.name,
791 					  dentry->d_name.len, &blkno,
792 					  dir, &dirent_bh, &dirent);
793 	if (status < 0) {
794 		if (status != -ENOENT)
795 			mlog_errno(status);
796 		goto leave;
797 	}
798 
799 	if (OCFS2_I(inode)->ip_blkno != blkno) {
800 		status = -ENOENT;
801 
802 		mlog(0, "ip_blkno %llu != dirent blkno %llu ip_flags = %x\n",
803 		     (unsigned long long)OCFS2_I(inode)->ip_blkno,
804 		     (unsigned long long)blkno, OCFS2_I(inode)->ip_flags);
805 		goto leave;
806 	}
807 
808 	status = ocfs2_meta_lock(inode, handle, &fe_bh, 1);
809 	if (status < 0) {
810 		if (status != -ENOENT)
811 			mlog_errno(status);
812 		goto leave;
813 	}
814 
815 	if (S_ISDIR(inode->i_mode)) {
816 	       	if (!ocfs2_empty_dir(inode)) {
817 			status = -ENOTEMPTY;
818 			goto leave;
819 		} else if (inode->i_nlink != 2) {
820 			status = -ENOTEMPTY;
821 			goto leave;
822 		}
823 	}
824 
825 	/* There are still a few steps left until we can consider the
826 	 * unlink to have succeeded. Save off nlink here before
827 	 * modification so we can set it back in case we hit an issue
828 	 * before commit. */
829 	saved_nlink = inode->i_nlink;
830 	if (S_ISDIR(inode->i_mode))
831 		inode->i_nlink = 0;
832 	else
833 		inode->i_nlink--;
834 
835 	status = ocfs2_request_unlink_vote(inode, dentry,
836 					   (unsigned int) inode->i_nlink);
837 	if (status < 0) {
838 		/* This vote should succeed under all normal
839 		 * circumstances. */
840 		mlog_errno(status);
841 		goto leave;
842 	}
843 
844 	if (!inode->i_nlink) {
845 		status = ocfs2_prepare_orphan_dir(osb, handle, inode,
846 						  orphan_name,
847 						  &orphan_entry_bh);
848 		if (status < 0) {
849 			mlog_errno(status);
850 			goto leave;
851 		}
852 	}
853 
854 	handle = ocfs2_start_trans(osb, handle, OCFS2_UNLINK_CREDITS);
855 	if (IS_ERR(handle)) {
856 		status = PTR_ERR(handle);
857 		handle = NULL;
858 		mlog_errno(status);
859 		goto leave;
860 	}
861 
862 	status = ocfs2_journal_access(handle, inode, fe_bh,
863 				      OCFS2_JOURNAL_ACCESS_WRITE);
864 	if (status < 0) {
865 		mlog_errno(status);
866 		goto leave;
867 	}
868 
869 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
870 
871 	if (!inode->i_nlink) {
872 		status = ocfs2_orphan_add(osb, handle, inode, fe, orphan_name,
873 					  orphan_entry_bh);
874 		if (status < 0) {
875 			mlog_errno(status);
876 			goto leave;
877 		}
878 	}
879 
880 	/* delete the name from the parent dir */
881 	status = ocfs2_delete_entry(handle, dir, dirent, dirent_bh);
882 	if (status < 0) {
883 		mlog_errno(status);
884 		goto leave;
885 	}
886 
887 	/* We can set nlink on the dinode now. clear the saved version
888 	 * so that it doesn't get set later. */
889 	fe->i_links_count = cpu_to_le16(inode->i_nlink);
890 	saved_nlink = 0;
891 
892 	status = ocfs2_journal_dirty(handle, fe_bh);
893 	if (status < 0) {
894 		mlog_errno(status);
895 		goto leave;
896 	}
897 
898 	if (S_ISDIR(inode->i_mode)) {
899 		dir->i_nlink--;
900 		status = ocfs2_mark_inode_dirty(handle, dir,
901 						parent_node_bh);
902 		if (status < 0) {
903 			mlog_errno(status);
904 			dir->i_nlink++;
905 		}
906 	}
907 
908 leave:
909 	if (status < 0 && saved_nlink)
910 		inode->i_nlink = saved_nlink;
911 
912 	if (handle)
913 		ocfs2_commit_trans(handle);
914 
915 	if (fe_bh)
916 		brelse(fe_bh);
917 
918 	if (dirent_bh)
919 		brelse(dirent_bh);
920 
921 	if (parent_node_bh)
922 		brelse(parent_node_bh);
923 
924 	if (orphan_entry_bh)
925 		brelse(orphan_entry_bh);
926 
927 	mlog_exit(status);
928 
929 	return status;
930 }
931 
932 /*
933  * The only place this should be used is rename!
934  * if they have the same id, then the 1st one is the only one locked.
935  */
936 static int ocfs2_double_lock(struct ocfs2_super *osb,
937 			     struct ocfs2_journal_handle *handle,
938 			     struct buffer_head **bh1,
939 			     struct inode *inode1,
940 			     struct buffer_head **bh2,
941 			     struct inode *inode2)
942 {
943 	int status;
944 	struct ocfs2_inode_info *oi1 = OCFS2_I(inode1);
945 	struct ocfs2_inode_info *oi2 = OCFS2_I(inode2);
946 	struct buffer_head **tmpbh;
947 	struct inode *tmpinode;
948 
949 	mlog_entry("(inode1 = %llu, inode2 = %llu)\n",
950 		   (unsigned long long)oi1->ip_blkno,
951 		   (unsigned long long)oi2->ip_blkno);
952 
953 	BUG_ON(!handle);
954 
955 	if (*bh1)
956 		*bh1 = NULL;
957 	if (*bh2)
958 		*bh2 = NULL;
959 
960 	/* we always want to lock the one with the lower lockid first. */
961 	if (oi1->ip_blkno != oi2->ip_blkno) {
962 		if (oi1->ip_blkno < oi2->ip_blkno) {
963 			/* switch id1 and id2 around */
964 			mlog(0, "switching them around...\n");
965 			tmpbh = bh2;
966 			bh2 = bh1;
967 			bh1 = tmpbh;
968 
969 			tmpinode = inode2;
970 			inode2 = inode1;
971 			inode1 = tmpinode;
972 		}
973 		/* lock id2 */
974 		status = ocfs2_meta_lock(inode2, handle, bh2, 1);
975 		if (status < 0) {
976 			if (status != -ENOENT)
977 				mlog_errno(status);
978 			goto bail;
979 		}
980 	}
981 	/* lock id1 */
982 	status = ocfs2_meta_lock(inode1, handle, bh1, 1);
983 	if (status < 0) {
984 		if (status != -ENOENT)
985 			mlog_errno(status);
986 		goto bail;
987 	}
988 bail:
989 	mlog_exit(status);
990 	return status;
991 }
992 
993 #define PARENT_INO(buffer) \
994 	((struct ocfs2_dir_entry *) \
995 	 ((char *)buffer + \
996 	  le16_to_cpu(((struct ocfs2_dir_entry *)buffer)->rec_len)))->inode
997 
998 static int ocfs2_rename(struct inode *old_dir,
999 			struct dentry *old_dentry,
1000 			struct inode *new_dir,
1001 			struct dentry *new_dentry)
1002 {
1003 	int status = 0, rename_lock = 0;
1004 	struct inode *old_inode = old_dentry->d_inode;
1005 	struct inode *new_inode = new_dentry->d_inode;
1006 	struct ocfs2_dinode *newfe = NULL;
1007 	char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
1008 	struct buffer_head *orphan_entry_bh = NULL;
1009 	struct buffer_head *newfe_bh = NULL;
1010 	struct buffer_head *insert_entry_bh = NULL;
1011 	struct ocfs2_super *osb = NULL;
1012 	u64 newfe_blkno;
1013 	struct ocfs2_journal_handle *handle = NULL;
1014 	struct buffer_head *old_dir_bh = NULL;
1015 	struct buffer_head *new_dir_bh = NULL;
1016 	struct ocfs2_dir_entry *old_de = NULL, *new_de = NULL; // dirent for old_dentry
1017 							       // and new_dentry
1018 	struct buffer_head *new_de_bh = NULL, *old_de_bh = NULL; // bhs for above
1019 	struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir,
1020 						    // this is the 1st dirent bh
1021 	nlink_t old_dir_nlink = old_dir->i_nlink, new_dir_nlink = new_dir->i_nlink;
1022 	unsigned int links_count;
1023 
1024 	/* At some point it might be nice to break this function up a
1025 	 * bit. */
1026 
1027 	mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p, from='%.*s' to='%.*s')\n",
1028 		   old_dir, old_dentry, new_dir, new_dentry,
1029 		   old_dentry->d_name.len, old_dentry->d_name.name,
1030 		   new_dentry->d_name.len, new_dentry->d_name.name);
1031 
1032 	osb = OCFS2_SB(old_dir->i_sb);
1033 
1034 	if (new_inode) {
1035 		if (!igrab(new_inode))
1036 			BUG();
1037 	}
1038 
1039 	if (atomic_read(&old_dentry->d_count) > 2) {
1040 		shrink_dcache_parent(old_dentry);
1041 		if (atomic_read(&old_dentry->d_count) > 2) {
1042 			status = -EBUSY;
1043 			goto bail;
1044 		}
1045 	}
1046 
1047 	/* Assume a directory heirarchy thusly:
1048 	 * a/b/c
1049 	 * a/d
1050 	 * a,b,c, and d are all directories.
1051 	 *
1052 	 * from cwd of 'a' on both nodes:
1053 	 * node1: mv b/c d
1054 	 * node2: mv d   b/c
1055 	 *
1056 	 * And that's why, just like the VFS, we need a file system
1057 	 * rename lock. */
1058 	if (old_dentry != new_dentry) {
1059 		status = ocfs2_rename_lock(osb);
1060 		if (status < 0) {
1061 			mlog_errno(status);
1062 			goto bail;
1063 		}
1064 		rename_lock = 1;
1065 	}
1066 
1067 	handle = ocfs2_alloc_handle(osb);
1068 	if (handle == NULL) {
1069 		status = -ENOMEM;
1070 		mlog_errno(status);
1071 		goto bail;
1072 	}
1073 
1074 	/* if old and new are the same, this'll just do one lock. */
1075 	status = ocfs2_double_lock(osb, handle,
1076 				  &old_dir_bh, old_dir,
1077 				  &new_dir_bh, new_dir);
1078 	if (status < 0) {
1079 		mlog_errno(status);
1080 		goto bail;
1081 	}
1082 
1083 	/* make sure both dirs have bhs
1084 	 * get an extra ref on old_dir_bh if old==new */
1085 	if (!new_dir_bh) {
1086 		if (old_dir_bh) {
1087 			new_dir_bh = old_dir_bh;
1088 			get_bh(new_dir_bh);
1089 		} else {
1090 			mlog(ML_ERROR, "no old_dir_bh!\n");
1091 			status = -EIO;
1092 			goto bail;
1093 		}
1094 	}
1095 
1096 	if (S_ISDIR(old_inode->i_mode)) {
1097 		/* Directories actually require metadata updates to
1098 		 * the directory info so we can't get away with not
1099 		 * doing node locking on it. */
1100 		status = ocfs2_meta_lock(old_inode, handle, NULL, 1);
1101 		if (status < 0) {
1102 			if (status != -ENOENT)
1103 				mlog_errno(status);
1104 			goto bail;
1105 		}
1106 
1107 		status = ocfs2_request_rename_vote(old_inode, old_dentry);
1108 		if (status < 0) {
1109 			mlog_errno(status);
1110 			goto bail;
1111 		}
1112 
1113 		status = -EIO;
1114 		old_inode_de_bh = ocfs2_bread(old_inode, 0, &status, 0);
1115 		if (!old_inode_de_bh)
1116 			goto bail;
1117 
1118 		status = -EIO;
1119 		if (le64_to_cpu(PARENT_INO(old_inode_de_bh->b_data)) !=
1120 		    OCFS2_I(old_dir)->ip_blkno)
1121 			goto bail;
1122 		status = -EMLINK;
1123 		if (!new_inode && new_dir!=old_dir &&
1124 		    new_dir->i_nlink >= OCFS2_LINK_MAX)
1125 			goto bail;
1126 	} else {
1127 		/* Ah, the simple case - we're a file so just send a
1128 		 * message. */
1129 		status = ocfs2_request_rename_vote(old_inode, old_dentry);
1130 		if (status < 0) {
1131 			mlog_errno(status);
1132 			goto bail;
1133 		}
1134 	}
1135 
1136 	status = -ENOENT;
1137 	old_de_bh = ocfs2_find_entry(old_dentry->d_name.name,
1138 				     old_dentry->d_name.len,
1139 				     old_dir, &old_de);
1140 	if (!old_de_bh)
1141 		goto bail;
1142 
1143 	/*
1144 	 *  Check for inode number is _not_ due to possible IO errors.
1145 	 *  We might rmdir the source, keep it as pwd of some process
1146 	 *  and merrily kill the link to whatever was created under the
1147 	 *  same name. Goodbye sticky bit ;-<
1148 	 */
1149 	if (le64_to_cpu(old_de->inode) != OCFS2_I(old_inode)->ip_blkno)
1150 		goto bail;
1151 
1152 	/* check if the target already exists (in which case we need
1153 	 * to delete it */
1154 	status = ocfs2_find_files_on_disk(new_dentry->d_name.name,
1155 					  new_dentry->d_name.len,
1156 					  &newfe_blkno, new_dir, &new_de_bh,
1157 					  &new_de);
1158 	/* The only error we allow here is -ENOENT because the new
1159 	 * file not existing is perfectly valid. */
1160 	if ((status < 0) && (status != -ENOENT)) {
1161 		/* If we cannot find the file specified we should just */
1162 		/* return the error... */
1163 		mlog_errno(status);
1164 		goto bail;
1165 	}
1166 
1167 	if (!new_de && new_inode)
1168 		mlog(ML_ERROR, "inode %lu does not exist in it's parent "
1169 		     "directory!", new_inode->i_ino);
1170 
1171 	/* In case we need to overwrite an existing file, we blow it
1172 	 * away first */
1173 	if (new_de) {
1174 		/* VFS didn't think there existed an inode here, but
1175 		 * someone else in the cluster must have raced our
1176 		 * rename to create one. Today we error cleanly, in
1177 		 * the future we should consider calling iget to build
1178 		 * a new struct inode for this entry. */
1179 		if (!new_inode) {
1180 			status = -EACCES;
1181 
1182 			mlog(0, "We found an inode for name %.*s but VFS "
1183 			     "didn't give us one.\n", new_dentry->d_name.len,
1184 			     new_dentry->d_name.name);
1185 			goto bail;
1186 		}
1187 
1188 		if (OCFS2_I(new_inode)->ip_blkno != newfe_blkno) {
1189 			status = -EACCES;
1190 
1191 			mlog(0, "Inode %llu and dir %llu disagree. flags = %x\n",
1192 			     (unsigned long long)OCFS2_I(new_inode)->ip_blkno,
1193 			     (unsigned long long)newfe_blkno,
1194 			     OCFS2_I(new_inode)->ip_flags);
1195 			goto bail;
1196 		}
1197 
1198 		status = ocfs2_meta_lock(new_inode, handle, &newfe_bh, 1);
1199 		if (status < 0) {
1200 			if (status != -ENOENT)
1201 				mlog_errno(status);
1202 			goto bail;
1203 		}
1204 
1205 		if (S_ISDIR(new_inode->i_mode))
1206 			links_count = 0;
1207 		else
1208 			links_count = (unsigned int) (new_inode->i_nlink - 1);
1209 
1210 		status = ocfs2_request_unlink_vote(new_inode, new_dentry,
1211 						   links_count);
1212 		if (status < 0) {
1213 			mlog_errno(status);
1214 			goto bail;
1215 		}
1216 
1217 		newfe = (struct ocfs2_dinode *) newfe_bh->b_data;
1218 
1219 		mlog(0, "aha rename over existing... new_de=%p new_blkno=%llu "
1220 		     "newfebh=%p bhblocknr=%llu\n", new_de,
1221 		     (unsigned long long)newfe_blkno, newfe_bh, newfe_bh ?
1222 		     (unsigned long long)newfe_bh->b_blocknr : 0ULL);
1223 
1224 		if (S_ISDIR(new_inode->i_mode) || (new_inode->i_nlink == 1)) {
1225 			status = ocfs2_prepare_orphan_dir(osb, handle,
1226 							  new_inode,
1227 							  orphan_name,
1228 							  &orphan_entry_bh);
1229 			if (status < 0) {
1230 				mlog_errno(status);
1231 				goto bail;
1232 			}
1233 		}
1234 	} else {
1235 		BUG_ON(new_dentry->d_parent->d_inode != new_dir);
1236 
1237 		status = ocfs2_check_dir_for_entry(new_dir,
1238 						   new_dentry->d_name.name,
1239 						   new_dentry->d_name.len);
1240 		if (status)
1241 			goto bail;
1242 
1243 		status = ocfs2_prepare_dir_for_insert(osb, new_dir, new_dir_bh,
1244 						      new_dentry->d_name.name,
1245 						      new_dentry->d_name.len,
1246 						      &insert_entry_bh);
1247 		if (status < 0) {
1248 			mlog_errno(status);
1249 			goto bail;
1250 		}
1251 	}
1252 
1253 	handle = ocfs2_start_trans(osb, handle, OCFS2_RENAME_CREDITS);
1254 	if (IS_ERR(handle)) {
1255 		status = PTR_ERR(handle);
1256 		handle = NULL;
1257 		mlog_errno(status);
1258 		goto bail;
1259 	}
1260 
1261 	if (new_de) {
1262 		if (S_ISDIR(new_inode->i_mode)) {
1263 			if (!ocfs2_empty_dir(new_inode) ||
1264 			    new_inode->i_nlink != 2) {
1265 				status = -ENOTEMPTY;
1266 				goto bail;
1267 			}
1268 		}
1269 		status = ocfs2_journal_access(handle, new_inode, newfe_bh,
1270 					      OCFS2_JOURNAL_ACCESS_WRITE);
1271 		if (status < 0) {
1272 			mlog_errno(status);
1273 			goto bail;
1274 		}
1275 
1276 		if (S_ISDIR(new_inode->i_mode) ||
1277 		    (newfe->i_links_count == cpu_to_le16(1))){
1278 			status = ocfs2_orphan_add(osb, handle, new_inode,
1279 						  newfe, orphan_name,
1280 						  orphan_entry_bh);
1281 			if (status < 0) {
1282 				mlog_errno(status);
1283 				goto bail;
1284 			}
1285 		}
1286 
1287 		/* change the dirent to point to the correct inode */
1288 		status = ocfs2_journal_access(handle, new_dir, new_de_bh,
1289 					      OCFS2_JOURNAL_ACCESS_WRITE);
1290 		if (status < 0) {
1291 			mlog_errno(status);
1292 			goto bail;
1293 		}
1294 		new_de->inode = cpu_to_le64(OCFS2_I(old_inode)->ip_blkno);
1295 		new_de->file_type = old_de->file_type;
1296 		new_dir->i_version++;
1297 		status = ocfs2_journal_dirty(handle, new_de_bh);
1298 		if (status < 0) {
1299 			mlog_errno(status);
1300 			goto bail;
1301 		}
1302 
1303 		if (S_ISDIR(new_inode->i_mode))
1304 			newfe->i_links_count = 0;
1305 		else
1306 			le16_add_cpu(&newfe->i_links_count, -1);
1307 
1308 		status = ocfs2_journal_dirty(handle, newfe_bh);
1309 		if (status < 0) {
1310 			mlog_errno(status);
1311 			goto bail;
1312 		}
1313 	} else {
1314 		/* if the name was not found in new_dir, add it now */
1315 		status = ocfs2_add_entry(handle, new_dentry, old_inode,
1316 					 OCFS2_I(old_inode)->ip_blkno,
1317 					 new_dir_bh, insert_entry_bh);
1318 	}
1319 
1320 	old_inode->i_ctime = CURRENT_TIME;
1321 	mark_inode_dirty(old_inode);
1322 
1323 	/* now that the name has been added to new_dir, remove the old name */
1324 	status = ocfs2_delete_entry(handle, old_dir, old_de, old_de_bh);
1325 	if (status < 0) {
1326 		mlog_errno(status);
1327 		goto bail;
1328 	}
1329 
1330 	if (new_inode) {
1331 		new_inode->i_nlink--;
1332 		new_inode->i_ctime = CURRENT_TIME;
1333 	}
1334 	old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
1335 	if (old_inode_de_bh) {
1336 		status = ocfs2_journal_access(handle, old_inode,
1337 					     old_inode_de_bh,
1338 					     OCFS2_JOURNAL_ACCESS_WRITE);
1339 		PARENT_INO(old_inode_de_bh->b_data) =
1340 			cpu_to_le64(OCFS2_I(new_dir)->ip_blkno);
1341 		status = ocfs2_journal_dirty(handle, old_inode_de_bh);
1342 		old_dir->i_nlink--;
1343 		if (new_inode) {
1344 			new_inode->i_nlink--;
1345 		} else {
1346 			new_dir->i_nlink++;
1347 			mark_inode_dirty(new_dir);
1348 		}
1349 	}
1350 	mark_inode_dirty(old_dir);
1351 	if (new_inode)
1352 		mark_inode_dirty(new_inode);
1353 
1354 	if (old_dir != new_dir)
1355 		if (new_dir_nlink != new_dir->i_nlink) {
1356 			if (!new_dir_bh) {
1357 				mlog(ML_ERROR, "need to change nlink for new "
1358 				     "dir %llu from %d to %d but bh is NULL\n",
1359 				     (unsigned long long)OCFS2_I(new_dir)->ip_blkno,
1360 				     (int)new_dir_nlink, new_dir->i_nlink);
1361 			} else {
1362 				struct ocfs2_dinode *fe;
1363 				status = ocfs2_journal_access(handle,
1364 							      new_dir,
1365 							      new_dir_bh,
1366 							      OCFS2_JOURNAL_ACCESS_WRITE);
1367 				fe = (struct ocfs2_dinode *) new_dir_bh->b_data;
1368 				fe->i_links_count = cpu_to_le16(new_dir->i_nlink);
1369 				status = ocfs2_journal_dirty(handle, new_dir_bh);
1370 			}
1371 		}
1372 
1373 	if (old_dir_nlink != old_dir->i_nlink) {
1374 		if (!old_dir_bh) {
1375 			mlog(ML_ERROR, "need to change nlink for old dir "
1376 			     "%llu from %d to %d but bh is NULL!\n",
1377 			     (unsigned long long)OCFS2_I(old_dir)->ip_blkno,
1378 			     (int)old_dir_nlink, old_dir->i_nlink);
1379 		} else {
1380 			struct ocfs2_dinode *fe;
1381 			status = ocfs2_journal_access(handle, old_dir,
1382 						      old_dir_bh,
1383 						      OCFS2_JOURNAL_ACCESS_WRITE);
1384 			fe = (struct ocfs2_dinode *) old_dir_bh->b_data;
1385 			fe->i_links_count = cpu_to_le16(old_dir->i_nlink);
1386 			status = ocfs2_journal_dirty(handle, old_dir_bh);
1387 		}
1388 	}
1389 
1390 	status = 0;
1391 bail:
1392 	if (rename_lock)
1393 		ocfs2_rename_unlock(osb);
1394 
1395 	if (handle)
1396 		ocfs2_commit_trans(handle);
1397 
1398 	if (new_inode)
1399 		sync_mapping_buffers(old_inode->i_mapping);
1400 
1401 	if (new_inode)
1402 		iput(new_inode);
1403 	if (newfe_bh)
1404 		brelse(newfe_bh);
1405 	if (old_dir_bh)
1406 		brelse(old_dir_bh);
1407 	if (new_dir_bh)
1408 		brelse(new_dir_bh);
1409 	if (new_de_bh)
1410 		brelse(new_de_bh);
1411 	if (old_de_bh)
1412 		brelse(old_de_bh);
1413 	if (old_inode_de_bh)
1414 		brelse(old_inode_de_bh);
1415 	if (orphan_entry_bh)
1416 		brelse(orphan_entry_bh);
1417 	if (insert_entry_bh)
1418 		brelse(insert_entry_bh);
1419 
1420 	mlog_exit(status);
1421 
1422 	return status;
1423 }
1424 
1425 /*
1426  * we expect i_size = strlen(symname). Copy symname into the file
1427  * data, including the null terminator.
1428  */
1429 static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
1430 				     struct ocfs2_journal_handle *handle,
1431 				     struct inode *inode,
1432 				     const char *symname)
1433 {
1434 	struct buffer_head **bhs = NULL;
1435 	const char *c;
1436 	struct super_block *sb = osb->sb;
1437 	u64 p_blkno;
1438 	int p_blocks;
1439 	int virtual, blocks, status, i, bytes_left;
1440 
1441 	bytes_left = i_size_read(inode) + 1;
1442 	/* we can't trust i_blocks because we're actually going to
1443 	 * write i_size + 1 bytes. */
1444 	blocks = (bytes_left + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
1445 
1446 	mlog_entry("i_blocks = %llu, i_size = %llu, blocks = %d\n",
1447 			(unsigned long long)inode->i_blocks,
1448 			i_size_read(inode), blocks);
1449 
1450 	/* Sanity check -- make sure we're going to fit. */
1451 	if (bytes_left >
1452 	    ocfs2_clusters_to_bytes(sb, OCFS2_I(inode)->ip_clusters)) {
1453 		status = -EIO;
1454 		mlog_errno(status);
1455 		goto bail;
1456 	}
1457 
1458 	bhs = kcalloc(blocks, sizeof(struct buffer_head *), GFP_KERNEL);
1459 	if (!bhs) {
1460 		status = -ENOMEM;
1461 		mlog_errno(status);
1462 		goto bail;
1463 	}
1464 
1465 	status = ocfs2_extent_map_get_blocks(inode, 0, 1, &p_blkno,
1466 					     &p_blocks);
1467 	if (status < 0) {
1468 		mlog_errno(status);
1469 		goto bail;
1470 	}
1471 
1472 	/* links can never be larger than one cluster so we know this
1473 	 * is all going to be contiguous, but do a sanity check
1474 	 * anyway. */
1475 	if ((p_blocks << sb->s_blocksize_bits) < bytes_left) {
1476 		status = -EIO;
1477 		mlog_errno(status);
1478 		goto bail;
1479 	}
1480 
1481 	virtual = 0;
1482 	while(bytes_left > 0) {
1483 		c = &symname[virtual * sb->s_blocksize];
1484 
1485 		bhs[virtual] = sb_getblk(sb, p_blkno);
1486 		if (!bhs[virtual]) {
1487 			status = -ENOMEM;
1488 			mlog_errno(status);
1489 			goto bail;
1490 		}
1491 		ocfs2_set_new_buffer_uptodate(inode, bhs[virtual]);
1492 
1493 		status = ocfs2_journal_access(handle, inode, bhs[virtual],
1494 					      OCFS2_JOURNAL_ACCESS_CREATE);
1495 		if (status < 0) {
1496 			mlog_errno(status);
1497 			goto bail;
1498 		}
1499 
1500 		memset(bhs[virtual]->b_data, 0, sb->s_blocksize);
1501 
1502 		memcpy(bhs[virtual]->b_data, c,
1503 		       (bytes_left > sb->s_blocksize) ? sb->s_blocksize :
1504 		       bytes_left);
1505 
1506 		status = ocfs2_journal_dirty(handle, bhs[virtual]);
1507 		if (status < 0) {
1508 			mlog_errno(status);
1509 			goto bail;
1510 		}
1511 
1512 		virtual++;
1513 		p_blkno++;
1514 		bytes_left -= sb->s_blocksize;
1515 	}
1516 
1517 	status = 0;
1518 bail:
1519 
1520 	if (bhs) {
1521 		for(i = 0; i < blocks; i++)
1522 			if (bhs[i])
1523 				brelse(bhs[i]);
1524 		kfree(bhs);
1525 	}
1526 
1527 	mlog_exit(status);
1528 	return status;
1529 }
1530 
1531 static int ocfs2_symlink(struct inode *dir,
1532 			 struct dentry *dentry,
1533 			 const char *symname)
1534 {
1535 	int status, l, credits;
1536 	u64 newsize;
1537 	struct ocfs2_super *osb = NULL;
1538 	struct inode *inode = NULL;
1539 	struct super_block *sb;
1540 	struct buffer_head *new_fe_bh = NULL;
1541 	struct buffer_head *de_bh = NULL;
1542 	struct buffer_head *parent_fe_bh = NULL;
1543 	struct ocfs2_dinode *fe = NULL;
1544 	struct ocfs2_dinode *dirfe;
1545 	struct ocfs2_journal_handle *handle = NULL;
1546 	struct ocfs2_alloc_context *inode_ac = NULL;
1547 	struct ocfs2_alloc_context *data_ac = NULL;
1548 
1549 	mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir,
1550 		   dentry, symname, dentry->d_name.len, dentry->d_name.name);
1551 
1552 	sb = dir->i_sb;
1553 	osb = OCFS2_SB(sb);
1554 
1555 	l = strlen(symname) + 1;
1556 
1557 	credits = ocfs2_calc_symlink_credits(sb);
1558 
1559 	handle = ocfs2_alloc_handle(osb);
1560 	if (handle == NULL) {
1561 		status = -ENOMEM;
1562 		mlog_errno(status);
1563 		goto bail;
1564 	}
1565 
1566 	/* lock the parent directory */
1567 	status = ocfs2_meta_lock(dir, handle, &parent_fe_bh, 1);
1568 	if (status < 0) {
1569 		if (status != -ENOENT)
1570 			mlog_errno(status);
1571 		goto bail;
1572 	}
1573 
1574 	dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
1575 	if (!dirfe->i_links_count) {
1576 		/* can't make a file in a deleted directory. */
1577 		status = -ENOENT;
1578 		goto bail;
1579 	}
1580 
1581 	status = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
1582 					   dentry->d_name.len);
1583 	if (status)
1584 		goto bail;
1585 
1586 	status = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh,
1587 					      dentry->d_name.name,
1588 					      dentry->d_name.len, &de_bh);
1589 	if (status < 0) {
1590 		mlog_errno(status);
1591 		goto bail;
1592 	}
1593 
1594 	status = ocfs2_reserve_new_inode(osb, handle, &inode_ac);
1595 	if (status < 0) {
1596 		if (status != -ENOSPC)
1597 			mlog_errno(status);
1598 		goto bail;
1599 	}
1600 
1601 	/* don't reserve bitmap space for fast symlinks. */
1602 	if (l > ocfs2_fast_symlink_chars(sb)) {
1603 		status = ocfs2_reserve_clusters(osb, handle, 1, &data_ac);
1604 		if (status < 0) {
1605 			if (status != -ENOSPC)
1606 				mlog_errno(status);
1607 			goto bail;
1608 		}
1609 	}
1610 
1611 	handle = ocfs2_start_trans(osb, handle, credits);
1612 	if (IS_ERR(handle)) {
1613 		status = PTR_ERR(handle);
1614 		handle = NULL;
1615 		mlog_errno(status);
1616 		goto bail;
1617 	}
1618 
1619 	status = ocfs2_mknod_locked(osb, dir, dentry,
1620 				    S_IFLNK | S_IRWXUGO, 0,
1621 				    &new_fe_bh, parent_fe_bh, handle,
1622 				    &inode, inode_ac);
1623 	if (status < 0) {
1624 		mlog_errno(status);
1625 		goto bail;
1626 	}
1627 
1628 	fe = (struct ocfs2_dinode *) new_fe_bh->b_data;
1629 	inode->i_rdev = 0;
1630 	newsize = l - 1;
1631 	if (l > ocfs2_fast_symlink_chars(sb)) {
1632 		inode->i_op = &ocfs2_symlink_inode_operations;
1633 		status = ocfs2_do_extend_allocation(osb, inode, 1, new_fe_bh,
1634 						    handle, data_ac, NULL,
1635 						    NULL);
1636 		if (status < 0) {
1637 			if (status != -ENOSPC && status != -EINTR) {
1638 				mlog(ML_ERROR,
1639 				     "Failed to extend file to %llu\n",
1640 				     (unsigned long long)newsize);
1641 				mlog_errno(status);
1642 				status = -ENOSPC;
1643 			}
1644 			goto bail;
1645 		}
1646 		i_size_write(inode, newsize);
1647 		inode->i_blocks = ocfs2_align_bytes_to_sectors(newsize);
1648 	} else {
1649 		inode->i_op = &ocfs2_fast_symlink_inode_operations;
1650 		memcpy((char *) fe->id2.i_symlink, symname, l);
1651 		i_size_write(inode, newsize);
1652 		inode->i_blocks = 0;
1653 	}
1654 
1655 	status = ocfs2_mark_inode_dirty(handle, inode, new_fe_bh);
1656 	if (status < 0) {
1657 		mlog_errno(status);
1658 		goto bail;
1659 	}
1660 
1661 	if (!ocfs2_inode_is_fast_symlink(inode)) {
1662 		status = ocfs2_create_symlink_data(osb, handle, inode,
1663 						   symname);
1664 		if (status < 0) {
1665 			mlog_errno(status);
1666 			goto bail;
1667 		}
1668 	}
1669 
1670 	status = ocfs2_add_entry(handle, dentry, inode,
1671 				 le64_to_cpu(fe->i_blkno), parent_fe_bh,
1672 				 de_bh);
1673 	if (status < 0) {
1674 		mlog_errno(status);
1675 		goto bail;
1676 	}
1677 
1678 	insert_inode_hash(inode);
1679 	dentry->d_op = &ocfs2_dentry_ops;
1680 	d_instantiate(dentry, inode);
1681 bail:
1682 	if (handle)
1683 		ocfs2_commit_trans(handle);
1684 	if (new_fe_bh)
1685 		brelse(new_fe_bh);
1686 	if (parent_fe_bh)
1687 		brelse(parent_fe_bh);
1688 	if (de_bh)
1689 		brelse(de_bh);
1690 	if (inode_ac)
1691 		ocfs2_free_alloc_context(inode_ac);
1692 	if (data_ac)
1693 		ocfs2_free_alloc_context(data_ac);
1694 	if ((status < 0) && inode)
1695 		iput(inode);
1696 
1697 	mlog_exit(status);
1698 
1699 	return status;
1700 }
1701 
1702 int ocfs2_check_dir_entry(struct inode * dir,
1703 			  struct ocfs2_dir_entry * de,
1704 			  struct buffer_head * bh,
1705 			  unsigned long offset)
1706 {
1707 	const char *error_msg = NULL;
1708 	const int rlen = le16_to_cpu(de->rec_len);
1709 
1710 	if (rlen < OCFS2_DIR_REC_LEN(1))
1711 		error_msg = "rec_len is smaller than minimal";
1712 	else if (rlen % 4 != 0)
1713 		error_msg = "rec_len % 4 != 0";
1714 	else if (rlen < OCFS2_DIR_REC_LEN(de->name_len))
1715 		error_msg = "rec_len is too small for name_len";
1716 	else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
1717 		error_msg = "directory entry across blocks";
1718 
1719 	if (error_msg != NULL)
1720 		mlog(ML_ERROR, "bad entry in directory #%llu: %s - "
1721 		     "offset=%lu, inode=%llu, rec_len=%d, name_len=%d\n",
1722 		     (unsigned long long)OCFS2_I(dir)->ip_blkno, error_msg,
1723 		     offset, (unsigned long long)le64_to_cpu(de->inode), rlen,
1724 		     de->name_len);
1725 	return error_msg == NULL ? 1 : 0;
1726 }
1727 
1728 /* we don't always have a dentry for what we want to add, so people
1729  * like orphan dir can call this instead.
1730  *
1731  * If you pass me insert_bh, I'll skip the search of the other dir
1732  * blocks and put the record in there.
1733  */
1734 static int __ocfs2_add_entry(struct ocfs2_journal_handle *handle,
1735 			     struct inode *dir,
1736 			     const char *name, int namelen,
1737 			     struct inode *inode, u64 blkno,
1738 			     struct buffer_head *parent_fe_bh,
1739 			     struct buffer_head *insert_bh)
1740 {
1741 	unsigned long offset;
1742 	unsigned short rec_len;
1743 	struct ocfs2_dir_entry *de, *de1;
1744 	struct super_block *sb;
1745 	int retval, status;
1746 
1747 	mlog_entry_void();
1748 
1749 	sb = dir->i_sb;
1750 
1751 	if (!namelen)
1752 		return -EINVAL;
1753 
1754 	rec_len = OCFS2_DIR_REC_LEN(namelen);
1755 	offset = 0;
1756 	de = (struct ocfs2_dir_entry *) insert_bh->b_data;
1757 	while (1) {
1758 		BUG_ON((char *)de >= sb->s_blocksize + insert_bh->b_data);
1759 		/* These checks should've already been passed by the
1760 		 * prepare function, but I guess we can leave them
1761 		 * here anyway. */
1762 		if (!ocfs2_check_dir_entry(dir, de, insert_bh, offset)) {
1763 			retval = -ENOENT;
1764 			goto bail;
1765 		}
1766 		if (ocfs2_match(namelen, name, de)) {
1767 			retval = -EEXIST;
1768 			goto bail;
1769 		}
1770 		if (((le64_to_cpu(de->inode) == 0) &&
1771 		     (le16_to_cpu(de->rec_len) >= rec_len)) ||
1772 		    (le16_to_cpu(de->rec_len) >=
1773 		     (OCFS2_DIR_REC_LEN(de->name_len) + rec_len))) {
1774 			status = ocfs2_journal_access(handle, dir, insert_bh,
1775 						      OCFS2_JOURNAL_ACCESS_WRITE);
1776 			/* By now the buffer is marked for journaling */
1777 			offset += le16_to_cpu(de->rec_len);
1778 			if (le64_to_cpu(de->inode)) {
1779 				de1 = (struct ocfs2_dir_entry *)((char *) de +
1780 					OCFS2_DIR_REC_LEN(de->name_len));
1781 				de1->rec_len =
1782 					cpu_to_le16(le16_to_cpu(de->rec_len) -
1783 					OCFS2_DIR_REC_LEN(de->name_len));
1784 				de->rec_len = cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
1785 				de = de1;
1786 			}
1787 			de->file_type = OCFS2_FT_UNKNOWN;
1788 			if (blkno) {
1789 				de->inode = cpu_to_le64(blkno);
1790 				ocfs2_set_de_type(de, inode->i_mode);
1791 			} else
1792 				de->inode = 0;
1793 			de->name_len = namelen;
1794 			memcpy(de->name, name, namelen);
1795 
1796 			dir->i_mtime = dir->i_ctime = CURRENT_TIME;
1797 			dir->i_version++;
1798 			status = ocfs2_journal_dirty(handle, insert_bh);
1799 			retval = 0;
1800 			goto bail;
1801 		}
1802 		offset += le16_to_cpu(de->rec_len);
1803 		de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len));
1804 	}
1805 
1806 	/* when you think about it, the assert above should prevent us
1807 	 * from ever getting here. */
1808 	retval = -ENOSPC;
1809 bail:
1810 
1811 	mlog_exit(retval);
1812 	return retval;
1813 }
1814 
1815 
1816 /*
1817  * ocfs2_delete_entry deletes a directory entry by merging it with the
1818  * previous entry
1819  */
1820 static int ocfs2_delete_entry(struct ocfs2_journal_handle *handle,
1821 			      struct inode *dir,
1822 			      struct ocfs2_dir_entry *de_del,
1823 			      struct buffer_head *bh)
1824 {
1825 	struct ocfs2_dir_entry *de, *pde;
1826 	int i, status = -ENOENT;
1827 
1828 	mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p)\n", handle, dir, de_del, bh);
1829 
1830 	i = 0;
1831 	pde = NULL;
1832 	de = (struct ocfs2_dir_entry *) bh->b_data;
1833 	while (i < bh->b_size) {
1834 		if (!ocfs2_check_dir_entry(dir, de, bh, i)) {
1835 			status = -EIO;
1836 			mlog_errno(status);
1837 			goto bail;
1838 		}
1839 		if (de == de_del)  {
1840 			status = ocfs2_journal_access(handle, dir, bh,
1841 						      OCFS2_JOURNAL_ACCESS_WRITE);
1842 			if (status < 0) {
1843 				status = -EIO;
1844 				mlog_errno(status);
1845 				goto bail;
1846 			}
1847 			if (pde)
1848 				pde->rec_len =
1849 					cpu_to_le16(le16_to_cpu(pde->rec_len) +
1850 						    le16_to_cpu(de->rec_len));
1851 			else
1852 				de->inode = 0;
1853 			dir->i_version++;
1854 			status = ocfs2_journal_dirty(handle, bh);
1855 			goto bail;
1856 		}
1857 		i += le16_to_cpu(de->rec_len);
1858 		pde = de;
1859 		de = (struct ocfs2_dir_entry *)((char *)de + le16_to_cpu(de->rec_len));
1860 	}
1861 bail:
1862 	mlog_exit(status);
1863 	return status;
1864 }
1865 
1866 /*
1867  * Returns 0 if not found, -1 on failure, and 1 on success
1868  */
1869 static int inline ocfs2_search_dirblock(struct buffer_head *bh,
1870 					struct inode *dir,
1871 					const char *name, int namelen,
1872 					unsigned long offset,
1873 					struct ocfs2_dir_entry **res_dir)
1874 {
1875 	struct ocfs2_dir_entry *de;
1876 	char *dlimit, *de_buf;
1877 	int de_len;
1878 	int ret = 0;
1879 
1880 	mlog_entry_void();
1881 
1882 	de_buf = bh->b_data;
1883 	dlimit = de_buf + dir->i_sb->s_blocksize;
1884 
1885 	while (de_buf < dlimit) {
1886 		/* this code is executed quadratically often */
1887 		/* do minimal checking `by hand' */
1888 
1889 		de = (struct ocfs2_dir_entry *) de_buf;
1890 
1891 		if (de_buf + namelen <= dlimit &&
1892 		    ocfs2_match(namelen, name, de)) {
1893 			/* found a match - just to be sure, do a full check */
1894 			if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
1895 				ret = -1;
1896 				goto bail;
1897 			}
1898 			*res_dir = de;
1899 			ret = 1;
1900 			goto bail;
1901 		}
1902 
1903 		/* prevent looping on a bad block */
1904 		de_len = le16_to_cpu(de->rec_len);
1905 		if (de_len <= 0) {
1906 			ret = -1;
1907 			goto bail;
1908 		}
1909 
1910 		de_buf += de_len;
1911 		offset += de_len;
1912 	}
1913 
1914 bail:
1915 	mlog_exit(ret);
1916 	return ret;
1917 }
1918 
1919 struct buffer_head *ocfs2_find_entry(const char *name, int namelen,
1920 				     struct inode *dir,
1921 				     struct ocfs2_dir_entry **res_dir)
1922 {
1923 	struct super_block *sb;
1924 	struct buffer_head *bh_use[NAMEI_RA_SIZE];
1925 	struct buffer_head *bh, *ret = NULL;
1926 	unsigned long start, block, b;
1927 	int ra_max = 0;		/* Number of bh's in the readahead
1928 				   buffer, bh_use[] */
1929 	int ra_ptr = 0;		/* Current index into readahead
1930 				   buffer */
1931 	int num = 0;
1932 	int nblocks, i, err;
1933 
1934 	mlog_entry_void();
1935 
1936 	*res_dir = NULL;
1937 	sb = dir->i_sb;
1938 
1939 	nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
1940 	start = OCFS2_I(dir)->ip_dir_start_lookup;
1941 	if (start >= nblocks)
1942 		start = 0;
1943 	block = start;
1944 
1945 restart:
1946 	do {
1947 		/*
1948 		 * We deal with the read-ahead logic here.
1949 		 */
1950 		if (ra_ptr >= ra_max) {
1951 			/* Refill the readahead buffer */
1952 			ra_ptr = 0;
1953 			b = block;
1954 			for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
1955 				/*
1956 				 * Terminate if we reach the end of the
1957 				 * directory and must wrap, or if our
1958 				 * search has finished at this block.
1959 				 */
1960 				if (b >= nblocks || (num && block == start)) {
1961 					bh_use[ra_max] = NULL;
1962 					break;
1963 				}
1964 				num++;
1965 
1966 				bh = ocfs2_bread(dir, b++, &err, 1);
1967 				bh_use[ra_max] = bh;
1968 			}
1969 		}
1970 		if ((bh = bh_use[ra_ptr++]) == NULL)
1971 			goto next;
1972 		wait_on_buffer(bh);
1973 		if (!buffer_uptodate(bh)) {
1974 			/* read error, skip block & hope for the best */
1975 			ocfs2_error(dir->i_sb, "reading directory %llu, "
1976 				    "offset %lu\n",
1977 				    (unsigned long long)OCFS2_I(dir)->ip_blkno,
1978 				    block);
1979 			brelse(bh);
1980 			goto next;
1981 		}
1982 		i = ocfs2_search_dirblock(bh, dir, name, namelen,
1983 					  block << sb->s_blocksize_bits,
1984 					  res_dir);
1985 		if (i == 1) {
1986 			OCFS2_I(dir)->ip_dir_start_lookup = block;
1987 			ret = bh;
1988 			goto cleanup_and_exit;
1989 		} else {
1990 			brelse(bh);
1991 			if (i < 0)
1992 				goto cleanup_and_exit;
1993 		}
1994 	next:
1995 		if (++block >= nblocks)
1996 			block = 0;
1997 	} while (block != start);
1998 
1999 	/*
2000 	 * If the directory has grown while we were searching, then
2001 	 * search the last part of the directory before giving up.
2002 	 */
2003 	block = nblocks;
2004 	nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
2005 	if (block < nblocks) {
2006 		start = 0;
2007 		goto restart;
2008 	}
2009 
2010 cleanup_and_exit:
2011 	/* Clean up the read-ahead blocks */
2012 	for (; ra_ptr < ra_max; ra_ptr++)
2013 		brelse(bh_use[ra_ptr]);
2014 
2015 	mlog_exit_ptr(ret);
2016 	return ret;
2017 }
2018 
2019 static int ocfs2_blkno_stringify(u64 blkno, char *name)
2020 {
2021 	int status, namelen;
2022 
2023 	mlog_entry_void();
2024 
2025 	namelen = snprintf(name, OCFS2_ORPHAN_NAMELEN + 1, "%016llx",
2026 			   (long long)blkno);
2027 	if (namelen <= 0) {
2028 		if (namelen)
2029 			status = namelen;
2030 		else
2031 			status = -EINVAL;
2032 		mlog_errno(status);
2033 		goto bail;
2034 	}
2035 	if (namelen != OCFS2_ORPHAN_NAMELEN) {
2036 		status = -EINVAL;
2037 		mlog_errno(status);
2038 		goto bail;
2039 	}
2040 
2041 	mlog(0, "built filename '%s' for orphan dir (len=%d)\n", name,
2042 	     namelen);
2043 
2044 	status = 0;
2045 bail:
2046 	mlog_exit(status);
2047 	return status;
2048 }
2049 
2050 static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
2051 				    struct ocfs2_journal_handle *handle,
2052 				    struct inode *inode,
2053 				    char *name,
2054 				    struct buffer_head **de_bh)
2055 {
2056 	struct inode *orphan_dir_inode = NULL;
2057 	struct buffer_head *orphan_dir_bh = NULL;
2058 	int status = 0;
2059 
2060 	status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name);
2061 	if (status < 0) {
2062 		mlog_errno(status);
2063 		goto leave;
2064 	}
2065 
2066 	orphan_dir_inode = ocfs2_get_system_file_inode(osb,
2067 						       ORPHAN_DIR_SYSTEM_INODE,
2068 						       osb->slot_num);
2069 	if (!orphan_dir_inode) {
2070 		status = -ENOENT;
2071 		mlog_errno(status);
2072 		goto leave;
2073 	}
2074 
2075 	ocfs2_handle_add_inode(handle, orphan_dir_inode);
2076 	status = ocfs2_meta_lock(orphan_dir_inode, handle, &orphan_dir_bh, 1);
2077 	if (status < 0) {
2078 		mlog_errno(status);
2079 		goto leave;
2080 	}
2081 
2082 	status = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode,
2083 					      orphan_dir_bh, name,
2084 					      OCFS2_ORPHAN_NAMELEN, de_bh);
2085 	if (status < 0) {
2086 		mlog_errno(status);
2087 		goto leave;
2088 	}
2089 
2090 leave:
2091 	if (orphan_dir_inode)
2092 		iput(orphan_dir_inode);
2093 
2094 	if (orphan_dir_bh)
2095 		brelse(orphan_dir_bh);
2096 
2097 	mlog_exit(status);
2098 	return status;
2099 }
2100 
2101 static int ocfs2_orphan_add(struct ocfs2_super *osb,
2102 			    struct ocfs2_journal_handle *handle,
2103 			    struct inode *inode,
2104 			    struct ocfs2_dinode *fe,
2105 			    char *name,
2106 			    struct buffer_head *de_bh)
2107 {
2108 	struct inode *orphan_dir_inode = NULL;
2109 	struct buffer_head *orphan_dir_bh = NULL;
2110 	int status = 0;
2111 	struct ocfs2_dinode *orphan_fe;
2112 
2113 	mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
2114 
2115 	orphan_dir_inode = ocfs2_get_system_file_inode(osb,
2116 						       ORPHAN_DIR_SYSTEM_INODE,
2117 						       osb->slot_num);
2118 	if (!orphan_dir_inode) {
2119 		status = -ENOENT;
2120 		mlog_errno(status);
2121 		goto leave;
2122 	}
2123 
2124 	status = ocfs2_read_block(osb,
2125 				  OCFS2_I(orphan_dir_inode)->ip_blkno,
2126 				  &orphan_dir_bh, OCFS2_BH_CACHED,
2127 				  orphan_dir_inode);
2128 	if (status < 0) {
2129 		mlog_errno(status);
2130 		goto leave;
2131 	}
2132 
2133 	status = ocfs2_journal_access(handle, orphan_dir_inode, orphan_dir_bh,
2134 				      OCFS2_JOURNAL_ACCESS_WRITE);
2135 	if (status < 0) {
2136 		mlog_errno(status);
2137 		goto leave;
2138 	}
2139 
2140 	/* we're a cluster, and nlink can change on disk from
2141 	 * underneath us... */
2142 	orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data;
2143 	if (S_ISDIR(inode->i_mode))
2144 		le16_add_cpu(&orphan_fe->i_links_count, 1);
2145 	orphan_dir_inode->i_nlink = le16_to_cpu(orphan_fe->i_links_count);
2146 
2147 	status = ocfs2_journal_dirty(handle, orphan_dir_bh);
2148 	if (status < 0) {
2149 		mlog_errno(status);
2150 		goto leave;
2151 	}
2152 
2153 	status = __ocfs2_add_entry(handle, orphan_dir_inode, name,
2154 				   OCFS2_ORPHAN_NAMELEN, inode,
2155 				   OCFS2_I(inode)->ip_blkno,
2156 				   orphan_dir_bh, de_bh);
2157 	if (status < 0) {
2158 		mlog_errno(status);
2159 		goto leave;
2160 	}
2161 
2162 	le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL);
2163 
2164 	/* Record which orphan dir our inode now resides
2165 	 * in. delete_inode will use this to determine which orphan
2166 	 * dir to lock. */
2167 	spin_lock(&OCFS2_I(inode)->ip_lock);
2168 	OCFS2_I(inode)->ip_orphaned_slot = osb->slot_num;
2169 	spin_unlock(&OCFS2_I(inode)->ip_lock);
2170 
2171 	mlog(0, "Inode %llu orphaned in slot %d\n",
2172 	     (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num);
2173 
2174 leave:
2175 	if (orphan_dir_inode)
2176 		iput(orphan_dir_inode);
2177 
2178 	if (orphan_dir_bh)
2179 		brelse(orphan_dir_bh);
2180 
2181 	mlog_exit(status);
2182 	return status;
2183 }
2184 
2185 /* unlike orphan_add, we expect the orphan dir to already be locked here. */
2186 int ocfs2_orphan_del(struct ocfs2_super *osb,
2187 		     struct ocfs2_journal_handle *handle,
2188 		     struct inode *orphan_dir_inode,
2189 		     struct inode *inode,
2190 		     struct buffer_head *orphan_dir_bh)
2191 {
2192 	char name[OCFS2_ORPHAN_NAMELEN + 1];
2193 	struct ocfs2_dinode *orphan_fe;
2194 	int status = 0;
2195 	struct buffer_head *target_de_bh = NULL;
2196 	struct ocfs2_dir_entry *target_de = NULL;
2197 
2198 	mlog_entry_void();
2199 
2200 	status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name);
2201 	if (status < 0) {
2202 		mlog_errno(status);
2203 		goto leave;
2204 	}
2205 
2206 	mlog(0, "removing '%s' from orphan dir %llu (namelen=%d)\n",
2207 	     name, (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno,
2208 	     OCFS2_ORPHAN_NAMELEN);
2209 
2210 	/* find it's spot in the orphan directory */
2211 	target_de_bh = ocfs2_find_entry(name, OCFS2_ORPHAN_NAMELEN,
2212 					orphan_dir_inode, &target_de);
2213 	if (!target_de_bh) {
2214 		status = -ENOENT;
2215 		mlog_errno(status);
2216 		goto leave;
2217 	}
2218 
2219 	/* remove it from the orphan directory */
2220 	status = ocfs2_delete_entry(handle, orphan_dir_inode, target_de,
2221 				    target_de_bh);
2222 	if (status < 0) {
2223 		mlog_errno(status);
2224 		goto leave;
2225 	}
2226 
2227 	status = ocfs2_journal_access(handle,orphan_dir_inode,  orphan_dir_bh,
2228 				      OCFS2_JOURNAL_ACCESS_WRITE);
2229 	if (status < 0) {
2230 		mlog_errno(status);
2231 		goto leave;
2232 	}
2233 
2234 	/* do the i_nlink dance! :) */
2235 	orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data;
2236 	if (S_ISDIR(inode->i_mode))
2237 		le16_add_cpu(&orphan_fe->i_links_count, -1);
2238 	orphan_dir_inode->i_nlink = le16_to_cpu(orphan_fe->i_links_count);
2239 
2240 	status = ocfs2_journal_dirty(handle, orphan_dir_bh);
2241 	if (status < 0) {
2242 		mlog_errno(status);
2243 		goto leave;
2244 	}
2245 
2246 leave:
2247 	if (target_de_bh)
2248 		brelse(target_de_bh);
2249 
2250 	mlog_exit(status);
2251 	return status;
2252 }
2253 
2254 struct inode_operations ocfs2_dir_iops = {
2255 	.create		= ocfs2_create,
2256 	.lookup		= ocfs2_lookup,
2257 	.link		= ocfs2_link,
2258 	.unlink		= ocfs2_unlink,
2259 	.rmdir		= ocfs2_unlink,
2260 	.symlink	= ocfs2_symlink,
2261 	.mkdir		= ocfs2_mkdir,
2262 	.mknod		= ocfs2_mknod,
2263 	.rename		= ocfs2_rename,
2264 	.setattr	= ocfs2_setattr,
2265 	.getattr	= ocfs2_getattr,
2266 };
2267