xref: /linux/fs/ocfs2/namei.c (revision 2b8232ce512105e28453f301d1510de8363bccd1)
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * namei.c
5  *
6  * Create and rename file, directory, symlinks
7  *
8  * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
9  *
10  *  Portions of this code from linux/fs/ext3/dir.c
11  *
12  *  Copyright (C) 1992, 1993, 1994, 1995
13  *  Remy Card (card@masi.ibp.fr)
14  *  Laboratoire MASI - Institut Blaise pascal
15  *  Universite Pierre et Marie Curie (Paris VI)
16  *
17  *   from
18  *
19  *   linux/fs/minix/dir.c
20  *
21  *   Copyright (C) 1991, 1992 Linux Torvalds
22  *
23  * This program is free software; you can redistribute it and/or
24  * modify it under the terms of the GNU General Public
25  * License as published by the Free Software Foundation; either
26  * version 2 of the License, or (at your option) any later version.
27  *
28  * This program is distributed in the hope that it will be useful,
29  * but WITHOUT ANY WARRANTY; without even the implied warranty of
30  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
31  * General Public License for more details.
32  *
33  * You should have received a copy of the GNU General Public
34  * License along with this program; if not, write to the
35  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
36  * Boston, MA 021110-1307, USA.
37  */
38 
39 #include <linux/fs.h>
40 #include <linux/types.h>
41 #include <linux/slab.h>
42 #include <linux/highmem.h>
43 
44 #define MLOG_MASK_PREFIX ML_NAMEI
45 #include <cluster/masklog.h>
46 
47 #include "ocfs2.h"
48 
49 #include "alloc.h"
50 #include "dcache.h"
51 #include "dir.h"
52 #include "dlmglue.h"
53 #include "extent_map.h"
54 #include "file.h"
55 #include "inode.h"
56 #include "journal.h"
57 #include "namei.h"
58 #include "suballoc.h"
59 #include "super.h"
60 #include "symlink.h"
61 #include "sysfile.h"
62 #include "uptodate.h"
63 #include "vote.h"
64 
65 #include "buffer_head_io.h"
66 
67 static int ocfs2_mknod_locked(struct ocfs2_super *osb,
68 			      struct inode *dir,
69 			      struct dentry *dentry, int mode,
70 			      dev_t dev,
71 			      struct buffer_head **new_fe_bh,
72 			      struct buffer_head *parent_fe_bh,
73 			      handle_t *handle,
74 			      struct inode **ret_inode,
75 			      struct ocfs2_alloc_context *inode_ac);
76 
77 static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
78 				    struct inode **ret_orphan_dir,
79 				    struct inode *inode,
80 				    char *name,
81 				    struct buffer_head **de_bh);
82 
83 static int ocfs2_orphan_add(struct ocfs2_super *osb,
84 			    handle_t *handle,
85 			    struct inode *inode,
86 			    struct ocfs2_dinode *fe,
87 			    char *name,
88 			    struct buffer_head *de_bh,
89 			    struct inode *orphan_dir_inode);
90 
91 static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
92 				     handle_t *handle,
93 				     struct inode *inode,
94 				     const char *symname);
95 
96 /* An orphan dir name is an 8 byte value, printed as a hex string */
97 #define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64)))
98 
99 static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
100 				   struct nameidata *nd)
101 {
102 	int status;
103 	u64 blkno;
104 	struct inode *inode = NULL;
105 	struct dentry *ret;
106 	struct ocfs2_inode_info *oi;
107 
108 	mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry,
109 		   dentry->d_name.len, dentry->d_name.name);
110 
111 	if (dentry->d_name.len > OCFS2_MAX_FILENAME_LEN) {
112 		ret = ERR_PTR(-ENAMETOOLONG);
113 		goto bail;
114 	}
115 
116 	mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len,
117 	     dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno);
118 
119 	status = ocfs2_meta_lock(dir, NULL, 0);
120 	if (status < 0) {
121 		if (status != -ENOENT)
122 			mlog_errno(status);
123 		ret = ERR_PTR(status);
124 		goto bail;
125 	}
126 
127 	status = ocfs2_lookup_ino_from_name(dir, dentry->d_name.name,
128 					    dentry->d_name.len, &blkno);
129 	if (status < 0)
130 		goto bail_add;
131 
132 	inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0);
133 	if (IS_ERR(inode)) {
134 		ret = ERR_PTR(-EACCES);
135 		goto bail_unlock;
136 	}
137 
138 	oi = OCFS2_I(inode);
139 	/* Clear any orphaned state... If we were able to look up the
140 	 * inode from a directory, it certainly can't be orphaned. We
141 	 * might have the bad state from a node which intended to
142 	 * orphan this inode but crashed before it could commit the
143 	 * unlink. */
144 	spin_lock(&oi->ip_lock);
145 	oi->ip_flags &= ~OCFS2_INODE_MAYBE_ORPHANED;
146 	spin_unlock(&oi->ip_lock);
147 
148 bail_add:
149 	dentry->d_op = &ocfs2_dentry_ops;
150 	ret = d_splice_alias(inode, dentry);
151 
152 	if (inode) {
153 		/*
154 		 * If d_splice_alias() finds a DCACHE_DISCONNECTED
155 		 * dentry, it will d_move() it on top of ourse. The
156 		 * return value will indicate this however, so in
157 		 * those cases, we switch them around for the locking
158 		 * code.
159 		 *
160 		 * NOTE: This dentry already has ->d_op set from
161 		 * ocfs2_get_parent() and ocfs2_get_dentry()
162 		 */
163 		if (ret)
164 			dentry = ret;
165 
166 		status = ocfs2_dentry_attach_lock(dentry, inode,
167 						  OCFS2_I(dir)->ip_blkno);
168 		if (status) {
169 			mlog_errno(status);
170 			ret = ERR_PTR(status);
171 			goto bail_unlock;
172 		}
173 	}
174 
175 bail_unlock:
176 	/* Don't drop the cluster lock until *after* the d_add --
177 	 * unlink on another node will message us to remove that
178 	 * dentry under this lock so otherwise we can race this with
179 	 * the vote thread and have a stale dentry. */
180 	ocfs2_meta_unlock(dir, 0);
181 
182 bail:
183 
184 	mlog_exit_ptr(ret);
185 
186 	return ret;
187 }
188 
189 static int ocfs2_mknod(struct inode *dir,
190 		       struct dentry *dentry,
191 		       int mode,
192 		       dev_t dev)
193 {
194 	int status = 0;
195 	struct buffer_head *parent_fe_bh = NULL;
196 	handle_t *handle = NULL;
197 	struct ocfs2_super *osb;
198 	struct ocfs2_dinode *dirfe;
199 	struct buffer_head *new_fe_bh = NULL;
200 	struct buffer_head *de_bh = NULL;
201 	struct inode *inode = NULL;
202 	struct ocfs2_alloc_context *inode_ac = NULL;
203 	struct ocfs2_alloc_context *data_ac = NULL;
204 
205 	mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode,
206 		   (unsigned long)dev, dentry->d_name.len,
207 		   dentry->d_name.name);
208 
209 	/* get our super block */
210 	osb = OCFS2_SB(dir->i_sb);
211 
212 	status = ocfs2_meta_lock(dir, &parent_fe_bh, 1);
213 	if (status < 0) {
214 		if (status != -ENOENT)
215 			mlog_errno(status);
216 		return status;
217 	}
218 
219 	if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) {
220 		status = -EMLINK;
221 		goto leave;
222 	}
223 
224 	dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
225 	if (!dirfe->i_links_count) {
226 		/* can't make a file in a deleted directory. */
227 		status = -ENOENT;
228 		goto leave;
229 	}
230 
231 	status = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
232 					   dentry->d_name.len);
233 	if (status)
234 		goto leave;
235 
236 	/* get a spot inside the dir. */
237 	status = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh,
238 					      dentry->d_name.name,
239 					      dentry->d_name.len, &de_bh);
240 	if (status < 0) {
241 		mlog_errno(status);
242 		goto leave;
243 	}
244 
245 	/* reserve an inode spot */
246 	status = ocfs2_reserve_new_inode(osb, &inode_ac);
247 	if (status < 0) {
248 		if (status != -ENOSPC)
249 			mlog_errno(status);
250 		goto leave;
251 	}
252 
253 	/* Reserve a cluster if creating an extent based directory. */
254 	if (S_ISDIR(mode) && !ocfs2_supports_inline_data(osb)) {
255 		status = ocfs2_reserve_clusters(osb, 1, &data_ac);
256 		if (status < 0) {
257 			if (status != -ENOSPC)
258 				mlog_errno(status);
259 			goto leave;
260 		}
261 	}
262 
263 	handle = ocfs2_start_trans(osb, OCFS2_MKNOD_CREDITS);
264 	if (IS_ERR(handle)) {
265 		status = PTR_ERR(handle);
266 		handle = NULL;
267 		mlog_errno(status);
268 		goto leave;
269 	}
270 
271 	/* do the real work now. */
272 	status = ocfs2_mknod_locked(osb, dir, dentry, mode, dev,
273 				    &new_fe_bh, parent_fe_bh, handle,
274 				    &inode, inode_ac);
275 	if (status < 0) {
276 		mlog_errno(status);
277 		goto leave;
278 	}
279 
280 	if (S_ISDIR(mode)) {
281 		status = ocfs2_fill_new_dir(osb, handle, dir, inode,
282 					    new_fe_bh, data_ac);
283 		if (status < 0) {
284 			mlog_errno(status);
285 			goto leave;
286 		}
287 
288 		status = ocfs2_journal_access(handle, dir, parent_fe_bh,
289 					      OCFS2_JOURNAL_ACCESS_WRITE);
290 		if (status < 0) {
291 			mlog_errno(status);
292 			goto leave;
293 		}
294 		le16_add_cpu(&dirfe->i_links_count, 1);
295 		status = ocfs2_journal_dirty(handle, parent_fe_bh);
296 		if (status < 0) {
297 			mlog_errno(status);
298 			goto leave;
299 		}
300 		inc_nlink(dir);
301 	}
302 
303 	status = ocfs2_add_entry(handle, dentry, inode,
304 				 OCFS2_I(inode)->ip_blkno, parent_fe_bh,
305 				 de_bh);
306 	if (status < 0) {
307 		mlog_errno(status);
308 		goto leave;
309 	}
310 
311 	status = ocfs2_dentry_attach_lock(dentry, inode,
312 					  OCFS2_I(dir)->ip_blkno);
313 	if (status) {
314 		mlog_errno(status);
315 		goto leave;
316 	}
317 
318 	insert_inode_hash(inode);
319 	dentry->d_op = &ocfs2_dentry_ops;
320 	d_instantiate(dentry, inode);
321 	status = 0;
322 leave:
323 	if (handle)
324 		ocfs2_commit_trans(osb, handle);
325 
326 	ocfs2_meta_unlock(dir, 1);
327 
328 	if (status == -ENOSPC)
329 		mlog(0, "Disk is full\n");
330 
331 	if (new_fe_bh)
332 		brelse(new_fe_bh);
333 
334 	if (de_bh)
335 		brelse(de_bh);
336 
337 	if (parent_fe_bh)
338 		brelse(parent_fe_bh);
339 
340 	if ((status < 0) && inode)
341 		iput(inode);
342 
343 	if (inode_ac)
344 		ocfs2_free_alloc_context(inode_ac);
345 
346 	if (data_ac)
347 		ocfs2_free_alloc_context(data_ac);
348 
349 	mlog_exit(status);
350 
351 	return status;
352 }
353 
354 static int ocfs2_mknod_locked(struct ocfs2_super *osb,
355 			      struct inode *dir,
356 			      struct dentry *dentry, int mode,
357 			      dev_t dev,
358 			      struct buffer_head **new_fe_bh,
359 			      struct buffer_head *parent_fe_bh,
360 			      handle_t *handle,
361 			      struct inode **ret_inode,
362 			      struct ocfs2_alloc_context *inode_ac)
363 {
364 	int status = 0;
365 	struct ocfs2_dinode *fe = NULL;
366 	struct ocfs2_extent_list *fel;
367 	u64 fe_blkno = 0;
368 	u16 suballoc_bit;
369 	struct inode *inode = NULL;
370 
371 	mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode,
372 		   (unsigned long)dev, dentry->d_name.len,
373 		   dentry->d_name.name);
374 
375 	*new_fe_bh = NULL;
376 	*ret_inode = NULL;
377 
378 	status = ocfs2_claim_new_inode(osb, handle, inode_ac, &suballoc_bit,
379 				       &fe_blkno);
380 	if (status < 0) {
381 		mlog_errno(status);
382 		goto leave;
383 	}
384 
385 	inode = new_inode(dir->i_sb);
386 	if (IS_ERR(inode)) {
387 		status = PTR_ERR(inode);
388 		mlog(ML_ERROR, "new_inode failed!\n");
389 		goto leave;
390 	}
391 
392 	/* populate as many fields early on as possible - many of
393 	 * these are used by the support functions here and in
394 	 * callers. */
395 	inode->i_ino = ino_from_blkno(osb->sb, fe_blkno);
396 	OCFS2_I(inode)->ip_blkno = fe_blkno;
397 	if (S_ISDIR(mode))
398 		inode->i_nlink = 2;
399 	else
400 		inode->i_nlink = 1;
401 	inode->i_mode = mode;
402 	spin_lock(&osb->osb_lock);
403 	inode->i_generation = osb->s_next_generation++;
404 	spin_unlock(&osb->osb_lock);
405 
406 	*new_fe_bh = sb_getblk(osb->sb, fe_blkno);
407 	if (!*new_fe_bh) {
408 		status = -EIO;
409 		mlog_errno(status);
410 		goto leave;
411 	}
412 	ocfs2_set_new_buffer_uptodate(inode, *new_fe_bh);
413 
414 	status = ocfs2_journal_access(handle, inode, *new_fe_bh,
415 				      OCFS2_JOURNAL_ACCESS_CREATE);
416 	if (status < 0) {
417 		mlog_errno(status);
418 		goto leave;
419 	}
420 
421 	fe = (struct ocfs2_dinode *) (*new_fe_bh)->b_data;
422 	memset(fe, 0, osb->sb->s_blocksize);
423 
424 	fe->i_generation = cpu_to_le32(inode->i_generation);
425 	fe->i_fs_generation = cpu_to_le32(osb->fs_generation);
426 	fe->i_blkno = cpu_to_le64(fe_blkno);
427 	fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
428 	fe->i_suballoc_slot = cpu_to_le16(osb->slot_num);
429 	fe->i_uid = cpu_to_le32(current->fsuid);
430 	if (dir->i_mode & S_ISGID) {
431 		fe->i_gid = cpu_to_le32(dir->i_gid);
432 		if (S_ISDIR(mode))
433 			mode |= S_ISGID;
434 	} else
435 		fe->i_gid = cpu_to_le32(current->fsgid);
436 	fe->i_mode = cpu_to_le16(mode);
437 	if (S_ISCHR(mode) || S_ISBLK(mode))
438 		fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
439 
440 	fe->i_links_count = cpu_to_le16(inode->i_nlink);
441 
442 	fe->i_last_eb_blk = 0;
443 	strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE);
444 	le32_add_cpu(&fe->i_flags, OCFS2_VALID_FL);
445 	fe->i_atime = fe->i_ctime = fe->i_mtime =
446 		cpu_to_le64(CURRENT_TIME.tv_sec);
447 	fe->i_mtime_nsec = fe->i_ctime_nsec = fe->i_atime_nsec =
448 		cpu_to_le32(CURRENT_TIME.tv_nsec);
449 	fe->i_dtime = 0;
450 
451 	/*
452 	 * If supported, directories start with inline data.
453 	 */
454 	if (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) {
455 		u16 feat = le16_to_cpu(fe->i_dyn_features);
456 
457 		fe->i_dyn_features = cpu_to_le16(feat | OCFS2_INLINE_DATA_FL);
458 
459 		fe->id2.i_data.id_count = cpu_to_le16(ocfs2_max_inline_data(osb->sb));
460 	} else {
461 		fel = &fe->id2.i_list;
462 		fel->l_tree_depth = 0;
463 		fel->l_next_free_rec = 0;
464 		fel->l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(osb->sb));
465 	}
466 
467 	status = ocfs2_journal_dirty(handle, *new_fe_bh);
468 	if (status < 0) {
469 		mlog_errno(status);
470 		goto leave;
471 	}
472 
473 	if (ocfs2_populate_inode(inode, fe, 1) < 0) {
474 		mlog(ML_ERROR, "populate inode failed! bh->b_blocknr=%llu, "
475 		     "i_blkno=%llu, i_ino=%lu\n",
476 		     (unsigned long long)(*new_fe_bh)->b_blocknr,
477 		     (unsigned long long)le64_to_cpu(fe->i_blkno),
478 		     inode->i_ino);
479 		BUG();
480 	}
481 
482 	ocfs2_inode_set_new(osb, inode);
483 	if (!ocfs2_mount_local(osb)) {
484 		status = ocfs2_create_new_inode_locks(inode);
485 		if (status < 0)
486 			mlog_errno(status);
487 	}
488 
489 	status = 0; /* error in ocfs2_create_new_inode_locks is not
490 		     * critical */
491 
492 	*ret_inode = inode;
493 leave:
494 	if (status < 0) {
495 		if (*new_fe_bh) {
496 			brelse(*new_fe_bh);
497 			*new_fe_bh = NULL;
498 		}
499 		if (inode)
500 			iput(inode);
501 	}
502 
503 	mlog_exit(status);
504 	return status;
505 }
506 
507 static int ocfs2_mkdir(struct inode *dir,
508 		       struct dentry *dentry,
509 		       int mode)
510 {
511 	int ret;
512 
513 	mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry, mode,
514 		   dentry->d_name.len, dentry->d_name.name);
515 	ret = ocfs2_mknod(dir, dentry, mode | S_IFDIR, 0);
516 	mlog_exit(ret);
517 
518 	return ret;
519 }
520 
521 static int ocfs2_create(struct inode *dir,
522 			struct dentry *dentry,
523 			int mode,
524 			struct nameidata *nd)
525 {
526 	int ret;
527 
528 	mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry, mode,
529 		   dentry->d_name.len, dentry->d_name.name);
530 	ret = ocfs2_mknod(dir, dentry, mode | S_IFREG, 0);
531 	mlog_exit(ret);
532 
533 	return ret;
534 }
535 
536 static int ocfs2_link(struct dentry *old_dentry,
537 		      struct inode *dir,
538 		      struct dentry *dentry)
539 {
540 	handle_t *handle;
541 	struct inode *inode = old_dentry->d_inode;
542 	int err;
543 	struct buffer_head *fe_bh = NULL;
544 	struct buffer_head *parent_fe_bh = NULL;
545 	struct buffer_head *de_bh = NULL;
546 	struct ocfs2_dinode *fe = NULL;
547 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
548 
549 	mlog_entry("(inode=%lu, old='%.*s' new='%.*s')\n", inode->i_ino,
550 		   old_dentry->d_name.len, old_dentry->d_name.name,
551 		   dentry->d_name.len, dentry->d_name.name);
552 
553 	if (S_ISDIR(inode->i_mode))
554 		return -EPERM;
555 
556 	err = ocfs2_meta_lock(dir, &parent_fe_bh, 1);
557 	if (err < 0) {
558 		if (err != -ENOENT)
559 			mlog_errno(err);
560 		return err;
561 	}
562 
563 	if (!dir->i_nlink) {
564 		err = -ENOENT;
565 		goto out;
566 	}
567 
568 	err = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
569 					dentry->d_name.len);
570 	if (err)
571 		goto out;
572 
573 	err = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh,
574 					   dentry->d_name.name,
575 					   dentry->d_name.len, &de_bh);
576 	if (err < 0) {
577 		mlog_errno(err);
578 		goto out;
579 	}
580 
581 	err = ocfs2_meta_lock(inode, &fe_bh, 1);
582 	if (err < 0) {
583 		if (err != -ENOENT)
584 			mlog_errno(err);
585 		goto out;
586 	}
587 
588 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
589 	if (le16_to_cpu(fe->i_links_count) >= OCFS2_LINK_MAX) {
590 		err = -EMLINK;
591 		goto out_unlock_inode;
592 	}
593 
594 	handle = ocfs2_start_trans(osb, OCFS2_LINK_CREDITS);
595 	if (IS_ERR(handle)) {
596 		err = PTR_ERR(handle);
597 		handle = NULL;
598 		mlog_errno(err);
599 		goto out_unlock_inode;
600 	}
601 
602 	err = ocfs2_journal_access(handle, inode, fe_bh,
603 				   OCFS2_JOURNAL_ACCESS_WRITE);
604 	if (err < 0) {
605 		mlog_errno(err);
606 		goto out_commit;
607 	}
608 
609 	inc_nlink(inode);
610 	inode->i_ctime = CURRENT_TIME;
611 	fe->i_links_count = cpu_to_le16(inode->i_nlink);
612 	fe->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
613 	fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
614 
615 	err = ocfs2_journal_dirty(handle, fe_bh);
616 	if (err < 0) {
617 		le16_add_cpu(&fe->i_links_count, -1);
618 		drop_nlink(inode);
619 		mlog_errno(err);
620 		goto out_commit;
621 	}
622 
623 	err = ocfs2_add_entry(handle, dentry, inode,
624 			      OCFS2_I(inode)->ip_blkno,
625 			      parent_fe_bh, de_bh);
626 	if (err) {
627 		le16_add_cpu(&fe->i_links_count, -1);
628 		drop_nlink(inode);
629 		mlog_errno(err);
630 		goto out_commit;
631 	}
632 
633 	err = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno);
634 	if (err) {
635 		mlog_errno(err);
636 		goto out_commit;
637 	}
638 
639 	atomic_inc(&inode->i_count);
640 	dentry->d_op = &ocfs2_dentry_ops;
641 	d_instantiate(dentry, inode);
642 
643 out_commit:
644 	ocfs2_commit_trans(osb, handle);
645 out_unlock_inode:
646 	ocfs2_meta_unlock(inode, 1);
647 
648 out:
649 	ocfs2_meta_unlock(dir, 1);
650 
651 	if (de_bh)
652 		brelse(de_bh);
653 	if (fe_bh)
654 		brelse(fe_bh);
655 	if (parent_fe_bh)
656 		brelse(parent_fe_bh);
657 
658 	mlog_exit(err);
659 
660 	return err;
661 }
662 
663 /*
664  * Takes and drops an exclusive lock on the given dentry. This will
665  * force other nodes to drop it.
666  */
667 static int ocfs2_remote_dentry_delete(struct dentry *dentry)
668 {
669 	int ret;
670 
671 	ret = ocfs2_dentry_lock(dentry, 1);
672 	if (ret)
673 		mlog_errno(ret);
674 	else
675 		ocfs2_dentry_unlock(dentry, 1);
676 
677 	return ret;
678 }
679 
680 static inline int inode_is_unlinkable(struct inode *inode)
681 {
682 	if (S_ISDIR(inode->i_mode)) {
683 		if (inode->i_nlink == 2)
684 			return 1;
685 		return 0;
686 	}
687 
688 	if (inode->i_nlink == 1)
689 		return 1;
690 	return 0;
691 }
692 
693 static int ocfs2_unlink(struct inode *dir,
694 			struct dentry *dentry)
695 {
696 	int status;
697 	int child_locked = 0;
698 	struct inode *inode = dentry->d_inode;
699 	struct inode *orphan_dir = NULL;
700 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
701 	u64 blkno;
702 	struct ocfs2_dinode *fe = NULL;
703 	struct buffer_head *fe_bh = NULL;
704 	struct buffer_head *parent_node_bh = NULL;
705 	handle_t *handle = NULL;
706 	struct ocfs2_dir_entry *dirent = NULL;
707 	struct buffer_head *dirent_bh = NULL;
708 	char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
709 	struct buffer_head *orphan_entry_bh = NULL;
710 
711 	mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry,
712 		   dentry->d_name.len, dentry->d_name.name);
713 
714 	BUG_ON(dentry->d_parent->d_inode != dir);
715 
716 	mlog(0, "ino = %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);
717 
718 	if (inode == osb->root_inode) {
719 		mlog(0, "Cannot delete the root directory\n");
720 		return -EPERM;
721 	}
722 
723 	status = ocfs2_meta_lock(dir, &parent_node_bh, 1);
724 	if (status < 0) {
725 		if (status != -ENOENT)
726 			mlog_errno(status);
727 		return status;
728 	}
729 
730 	status = ocfs2_find_files_on_disk(dentry->d_name.name,
731 					  dentry->d_name.len, &blkno,
732 					  dir, &dirent_bh, &dirent);
733 	if (status < 0) {
734 		if (status != -ENOENT)
735 			mlog_errno(status);
736 		goto leave;
737 	}
738 
739 	if (OCFS2_I(inode)->ip_blkno != blkno) {
740 		status = -ENOENT;
741 
742 		mlog(0, "ip_blkno %llu != dirent blkno %llu ip_flags = %x\n",
743 		     (unsigned long long)OCFS2_I(inode)->ip_blkno,
744 		     (unsigned long long)blkno, OCFS2_I(inode)->ip_flags);
745 		goto leave;
746 	}
747 
748 	status = ocfs2_meta_lock(inode, &fe_bh, 1);
749 	if (status < 0) {
750 		if (status != -ENOENT)
751 			mlog_errno(status);
752 		goto leave;
753 	}
754 	child_locked = 1;
755 
756 	if (S_ISDIR(inode->i_mode)) {
757 	       	if (!ocfs2_empty_dir(inode)) {
758 			status = -ENOTEMPTY;
759 			goto leave;
760 		} else if (inode->i_nlink != 2) {
761 			status = -ENOTEMPTY;
762 			goto leave;
763 		}
764 	}
765 
766 	status = ocfs2_remote_dentry_delete(dentry);
767 	if (status < 0) {
768 		/* This vote should succeed under all normal
769 		 * circumstances. */
770 		mlog_errno(status);
771 		goto leave;
772 	}
773 
774 	if (inode_is_unlinkable(inode)) {
775 		status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, inode,
776 						  orphan_name,
777 						  &orphan_entry_bh);
778 		if (status < 0) {
779 			mlog_errno(status);
780 			goto leave;
781 		}
782 	}
783 
784 	handle = ocfs2_start_trans(osb, OCFS2_UNLINK_CREDITS);
785 	if (IS_ERR(handle)) {
786 		status = PTR_ERR(handle);
787 		handle = NULL;
788 		mlog_errno(status);
789 		goto leave;
790 	}
791 
792 	status = ocfs2_journal_access(handle, inode, fe_bh,
793 				      OCFS2_JOURNAL_ACCESS_WRITE);
794 	if (status < 0) {
795 		mlog_errno(status);
796 		goto leave;
797 	}
798 
799 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
800 
801 	if (inode_is_unlinkable(inode)) {
802 		status = ocfs2_orphan_add(osb, handle, inode, fe, orphan_name,
803 					  orphan_entry_bh, orphan_dir);
804 		if (status < 0) {
805 			mlog_errno(status);
806 			goto leave;
807 		}
808 	}
809 
810 	/* delete the name from the parent dir */
811 	status = ocfs2_delete_entry(handle, dir, dirent, dirent_bh);
812 	if (status < 0) {
813 		mlog_errno(status);
814 		goto leave;
815 	}
816 
817 	if (S_ISDIR(inode->i_mode))
818 		drop_nlink(inode);
819 	drop_nlink(inode);
820 	fe->i_links_count = cpu_to_le16(inode->i_nlink);
821 
822 	status = ocfs2_journal_dirty(handle, fe_bh);
823 	if (status < 0) {
824 		mlog_errno(status);
825 		goto leave;
826 	}
827 
828 	dir->i_ctime = dir->i_mtime = CURRENT_TIME;
829 	if (S_ISDIR(inode->i_mode))
830 		drop_nlink(dir);
831 
832 	status = ocfs2_mark_inode_dirty(handle, dir, parent_node_bh);
833 	if (status < 0) {
834 		mlog_errno(status);
835 		if (S_ISDIR(inode->i_mode))
836 			inc_nlink(dir);
837 	}
838 
839 leave:
840 	if (handle)
841 		ocfs2_commit_trans(osb, handle);
842 
843 	if (child_locked)
844 		ocfs2_meta_unlock(inode, 1);
845 
846 	ocfs2_meta_unlock(dir, 1);
847 
848 	if (orphan_dir) {
849 		/* This was locked for us in ocfs2_prepare_orphan_dir() */
850 		ocfs2_meta_unlock(orphan_dir, 1);
851 		mutex_unlock(&orphan_dir->i_mutex);
852 		iput(orphan_dir);
853 	}
854 
855 	if (fe_bh)
856 		brelse(fe_bh);
857 
858 	if (dirent_bh)
859 		brelse(dirent_bh);
860 
861 	if (parent_node_bh)
862 		brelse(parent_node_bh);
863 
864 	if (orphan_entry_bh)
865 		brelse(orphan_entry_bh);
866 
867 	mlog_exit(status);
868 
869 	return status;
870 }
871 
872 /*
873  * The only place this should be used is rename!
874  * if they have the same id, then the 1st one is the only one locked.
875  */
876 static int ocfs2_double_lock(struct ocfs2_super *osb,
877 			     struct buffer_head **bh1,
878 			     struct inode *inode1,
879 			     struct buffer_head **bh2,
880 			     struct inode *inode2)
881 {
882 	int status;
883 	struct ocfs2_inode_info *oi1 = OCFS2_I(inode1);
884 	struct ocfs2_inode_info *oi2 = OCFS2_I(inode2);
885 	struct buffer_head **tmpbh;
886 	struct inode *tmpinode;
887 
888 	mlog_entry("(inode1 = %llu, inode2 = %llu)\n",
889 		   (unsigned long long)oi1->ip_blkno,
890 		   (unsigned long long)oi2->ip_blkno);
891 
892 	if (*bh1)
893 		*bh1 = NULL;
894 	if (*bh2)
895 		*bh2 = NULL;
896 
897 	/* we always want to lock the one with the lower lockid first. */
898 	if (oi1->ip_blkno != oi2->ip_blkno) {
899 		if (oi1->ip_blkno < oi2->ip_blkno) {
900 			/* switch id1 and id2 around */
901 			mlog(0, "switching them around...\n");
902 			tmpbh = bh2;
903 			bh2 = bh1;
904 			bh1 = tmpbh;
905 
906 			tmpinode = inode2;
907 			inode2 = inode1;
908 			inode1 = tmpinode;
909 		}
910 		/* lock id2 */
911 		status = ocfs2_meta_lock(inode2, bh2, 1);
912 		if (status < 0) {
913 			if (status != -ENOENT)
914 				mlog_errno(status);
915 			goto bail;
916 		}
917 	}
918 
919 	/* lock id1 */
920 	status = ocfs2_meta_lock(inode1, bh1, 1);
921 	if (status < 0) {
922 		/*
923 		 * An error return must mean that no cluster locks
924 		 * were held on function exit.
925 		 */
926 		if (oi1->ip_blkno != oi2->ip_blkno)
927 			ocfs2_meta_unlock(inode2, 1);
928 
929 		if (status != -ENOENT)
930 			mlog_errno(status);
931 	}
932 
933 bail:
934 	mlog_exit(status);
935 	return status;
936 }
937 
938 static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2)
939 {
940 	ocfs2_meta_unlock(inode1, 1);
941 
942 	if (inode1 != inode2)
943 		ocfs2_meta_unlock(inode2, 1);
944 }
945 
946 static int ocfs2_rename(struct inode *old_dir,
947 			struct dentry *old_dentry,
948 			struct inode *new_dir,
949 			struct dentry *new_dentry)
950 {
951 	int status = 0, rename_lock = 0, parents_locked = 0;
952 	int old_child_locked = 0, new_child_locked = 0;
953 	struct inode *old_inode = old_dentry->d_inode;
954 	struct inode *new_inode = new_dentry->d_inode;
955 	struct inode *orphan_dir = NULL;
956 	struct ocfs2_dinode *newfe = NULL;
957 	char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
958 	struct buffer_head *orphan_entry_bh = NULL;
959 	struct buffer_head *newfe_bh = NULL;
960 	struct buffer_head *old_inode_bh = NULL;
961 	struct buffer_head *insert_entry_bh = NULL;
962 	struct ocfs2_super *osb = NULL;
963 	u64 newfe_blkno, old_de_ino;
964 	handle_t *handle = NULL;
965 	struct buffer_head *old_dir_bh = NULL;
966 	struct buffer_head *new_dir_bh = NULL;
967 	struct ocfs2_dir_entry *old_inode_dot_dot_de = NULL, *old_de = NULL,
968 		*new_de = NULL;
969 	struct buffer_head *new_de_bh = NULL, *old_de_bh = NULL; // bhs for above
970 	struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir,
971 						    // this is the 1st dirent bh
972 	nlink_t old_dir_nlink = old_dir->i_nlink;
973 	struct ocfs2_dinode *old_di;
974 
975 	/* At some point it might be nice to break this function up a
976 	 * bit. */
977 
978 	mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p, from='%.*s' to='%.*s')\n",
979 		   old_dir, old_dentry, new_dir, new_dentry,
980 		   old_dentry->d_name.len, old_dentry->d_name.name,
981 		   new_dentry->d_name.len, new_dentry->d_name.name);
982 
983 	osb = OCFS2_SB(old_dir->i_sb);
984 
985 	if (new_inode) {
986 		if (!igrab(new_inode))
987 			BUG();
988 	}
989 
990 	/* Assume a directory hierarchy thusly:
991 	 * a/b/c
992 	 * a/d
993 	 * a,b,c, and d are all directories.
994 	 *
995 	 * from cwd of 'a' on both nodes:
996 	 * node1: mv b/c d
997 	 * node2: mv d   b/c
998 	 *
999 	 * And that's why, just like the VFS, we need a file system
1000 	 * rename lock. */
1001 	if (old_dentry != new_dentry) {
1002 		status = ocfs2_rename_lock(osb);
1003 		if (status < 0) {
1004 			mlog_errno(status);
1005 			goto bail;
1006 		}
1007 		rename_lock = 1;
1008 	}
1009 
1010 	/* if old and new are the same, this'll just do one lock. */
1011 	status = ocfs2_double_lock(osb, &old_dir_bh, old_dir,
1012 				   &new_dir_bh, new_dir);
1013 	if (status < 0) {
1014 		mlog_errno(status);
1015 		goto bail;
1016 	}
1017 	parents_locked = 1;
1018 
1019 	/* make sure both dirs have bhs
1020 	 * get an extra ref on old_dir_bh if old==new */
1021 	if (!new_dir_bh) {
1022 		if (old_dir_bh) {
1023 			new_dir_bh = old_dir_bh;
1024 			get_bh(new_dir_bh);
1025 		} else {
1026 			mlog(ML_ERROR, "no old_dir_bh!\n");
1027 			status = -EIO;
1028 			goto bail;
1029 		}
1030 	}
1031 
1032 	/*
1033 	 * Aside from allowing a meta data update, the locking here
1034 	 * also ensures that the vote thread on other nodes won't have
1035 	 * to concurrently downconvert the inode and the dentry locks.
1036 	 */
1037 	status = ocfs2_meta_lock(old_inode, &old_inode_bh, 1);
1038 	if (status < 0) {
1039 		if (status != -ENOENT)
1040 			mlog_errno(status);
1041 		goto bail;
1042 	}
1043 	old_child_locked = 1;
1044 
1045 	status = ocfs2_remote_dentry_delete(old_dentry);
1046 	if (status < 0) {
1047 		mlog_errno(status);
1048 		goto bail;
1049 	}
1050 
1051 	if (S_ISDIR(old_inode->i_mode)) {
1052 		u64 old_inode_parent;
1053 
1054 		status = ocfs2_find_files_on_disk("..", 2, &old_inode_parent,
1055 						  old_inode, &old_inode_de_bh,
1056 						  &old_inode_dot_dot_de);
1057 		if (status) {
1058 			status = -EIO;
1059 			goto bail;
1060 		}
1061 
1062 		if (old_inode_parent != OCFS2_I(old_dir)->ip_blkno) {
1063 			status = -EIO;
1064 			goto bail;
1065 		}
1066 
1067 		if (!new_inode && new_dir != old_dir &&
1068 		    new_dir->i_nlink >= OCFS2_LINK_MAX) {
1069 			status = -EMLINK;
1070 			goto bail;
1071 		}
1072 	}
1073 
1074 	status = ocfs2_lookup_ino_from_name(old_dir, old_dentry->d_name.name,
1075 					    old_dentry->d_name.len,
1076 					    &old_de_ino);
1077 	if (status) {
1078 		status = -ENOENT;
1079 		goto bail;
1080 	}
1081 
1082 	/*
1083 	 *  Check for inode number is _not_ due to possible IO errors.
1084 	 *  We might rmdir the source, keep it as pwd of some process
1085 	 *  and merrily kill the link to whatever was created under the
1086 	 *  same name. Goodbye sticky bit ;-<
1087 	 */
1088 	if (old_de_ino != OCFS2_I(old_inode)->ip_blkno) {
1089 		status = -ENOENT;
1090 		goto bail;
1091 	}
1092 
1093 	/* check if the target already exists (in which case we need
1094 	 * to delete it */
1095 	status = ocfs2_find_files_on_disk(new_dentry->d_name.name,
1096 					  new_dentry->d_name.len,
1097 					  &newfe_blkno, new_dir, &new_de_bh,
1098 					  &new_de);
1099 	/* The only error we allow here is -ENOENT because the new
1100 	 * file not existing is perfectly valid. */
1101 	if ((status < 0) && (status != -ENOENT)) {
1102 		/* If we cannot find the file specified we should just */
1103 		/* return the error... */
1104 		mlog_errno(status);
1105 		goto bail;
1106 	}
1107 
1108 	if (!new_de && new_inode)
1109 		mlog(ML_ERROR, "inode %lu does not exist in it's parent "
1110 		     "directory!", new_inode->i_ino);
1111 
1112 	/* In case we need to overwrite an existing file, we blow it
1113 	 * away first */
1114 	if (new_de) {
1115 		/* VFS didn't think there existed an inode here, but
1116 		 * someone else in the cluster must have raced our
1117 		 * rename to create one. Today we error cleanly, in
1118 		 * the future we should consider calling iget to build
1119 		 * a new struct inode for this entry. */
1120 		if (!new_inode) {
1121 			status = -EACCES;
1122 
1123 			mlog(0, "We found an inode for name %.*s but VFS "
1124 			     "didn't give us one.\n", new_dentry->d_name.len,
1125 			     new_dentry->d_name.name);
1126 			goto bail;
1127 		}
1128 
1129 		if (OCFS2_I(new_inode)->ip_blkno != newfe_blkno) {
1130 			status = -EACCES;
1131 
1132 			mlog(0, "Inode %llu and dir %llu disagree. flags = %x\n",
1133 			     (unsigned long long)OCFS2_I(new_inode)->ip_blkno,
1134 			     (unsigned long long)newfe_blkno,
1135 			     OCFS2_I(new_inode)->ip_flags);
1136 			goto bail;
1137 		}
1138 
1139 		status = ocfs2_meta_lock(new_inode, &newfe_bh, 1);
1140 		if (status < 0) {
1141 			if (status != -ENOENT)
1142 				mlog_errno(status);
1143 			goto bail;
1144 		}
1145 		new_child_locked = 1;
1146 
1147 		status = ocfs2_remote_dentry_delete(new_dentry);
1148 		if (status < 0) {
1149 			mlog_errno(status);
1150 			goto bail;
1151 		}
1152 
1153 		newfe = (struct ocfs2_dinode *) newfe_bh->b_data;
1154 
1155 		mlog(0, "aha rename over existing... new_de=%p new_blkno=%llu "
1156 		     "newfebh=%p bhblocknr=%llu\n", new_de,
1157 		     (unsigned long long)newfe_blkno, newfe_bh, newfe_bh ?
1158 		     (unsigned long long)newfe_bh->b_blocknr : 0ULL);
1159 
1160 		if (S_ISDIR(new_inode->i_mode) || (new_inode->i_nlink == 1)) {
1161 			status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
1162 							  new_inode,
1163 							  orphan_name,
1164 							  &orphan_entry_bh);
1165 			if (status < 0) {
1166 				mlog_errno(status);
1167 				goto bail;
1168 			}
1169 		}
1170 	} else {
1171 		BUG_ON(new_dentry->d_parent->d_inode != new_dir);
1172 
1173 		status = ocfs2_check_dir_for_entry(new_dir,
1174 						   new_dentry->d_name.name,
1175 						   new_dentry->d_name.len);
1176 		if (status)
1177 			goto bail;
1178 
1179 		status = ocfs2_prepare_dir_for_insert(osb, new_dir, new_dir_bh,
1180 						      new_dentry->d_name.name,
1181 						      new_dentry->d_name.len,
1182 						      &insert_entry_bh);
1183 		if (status < 0) {
1184 			mlog_errno(status);
1185 			goto bail;
1186 		}
1187 	}
1188 
1189 	handle = ocfs2_start_trans(osb, OCFS2_RENAME_CREDITS);
1190 	if (IS_ERR(handle)) {
1191 		status = PTR_ERR(handle);
1192 		handle = NULL;
1193 		mlog_errno(status);
1194 		goto bail;
1195 	}
1196 
1197 	if (new_de) {
1198 		if (S_ISDIR(new_inode->i_mode)) {
1199 			if (!ocfs2_empty_dir(new_inode) ||
1200 			    new_inode->i_nlink != 2) {
1201 				status = -ENOTEMPTY;
1202 				goto bail;
1203 			}
1204 		}
1205 		status = ocfs2_journal_access(handle, new_inode, newfe_bh,
1206 					      OCFS2_JOURNAL_ACCESS_WRITE);
1207 		if (status < 0) {
1208 			mlog_errno(status);
1209 			goto bail;
1210 		}
1211 
1212 		if (S_ISDIR(new_inode->i_mode) ||
1213 		    (newfe->i_links_count == cpu_to_le16(1))){
1214 			status = ocfs2_orphan_add(osb, handle, new_inode,
1215 						  newfe, orphan_name,
1216 						  orphan_entry_bh, orphan_dir);
1217 			if (status < 0) {
1218 				mlog_errno(status);
1219 				goto bail;
1220 			}
1221 		}
1222 
1223 		/* change the dirent to point to the correct inode */
1224 		status = ocfs2_update_entry(new_dir, handle, new_de_bh,
1225 					    new_de, old_inode);
1226 		if (status < 0) {
1227 			mlog_errno(status);
1228 			goto bail;
1229 		}
1230 		new_dir->i_version++;
1231 
1232 		if (S_ISDIR(new_inode->i_mode))
1233 			newfe->i_links_count = 0;
1234 		else
1235 			le16_add_cpu(&newfe->i_links_count, -1);
1236 
1237 		status = ocfs2_journal_dirty(handle, newfe_bh);
1238 		if (status < 0) {
1239 			mlog_errno(status);
1240 			goto bail;
1241 		}
1242 	} else {
1243 		/* if the name was not found in new_dir, add it now */
1244 		status = ocfs2_add_entry(handle, new_dentry, old_inode,
1245 					 OCFS2_I(old_inode)->ip_blkno,
1246 					 new_dir_bh, insert_entry_bh);
1247 	}
1248 
1249 	old_inode->i_ctime = CURRENT_TIME;
1250 	mark_inode_dirty(old_inode);
1251 
1252 	status = ocfs2_journal_access(handle, old_inode, old_inode_bh,
1253 				      OCFS2_JOURNAL_ACCESS_WRITE);
1254 	if (status >= 0) {
1255 		old_di = (struct ocfs2_dinode *) old_inode_bh->b_data;
1256 
1257 		old_di->i_ctime = cpu_to_le64(old_inode->i_ctime.tv_sec);
1258 		old_di->i_ctime_nsec = cpu_to_le32(old_inode->i_ctime.tv_nsec);
1259 
1260 		status = ocfs2_journal_dirty(handle, old_inode_bh);
1261 		if (status < 0)
1262 			mlog_errno(status);
1263 	} else
1264 		mlog_errno(status);
1265 
1266 	/*
1267 	 * Now that the name has been added to new_dir, remove the old name.
1268 	 *
1269 	 * We don't keep any directory entry context around until now
1270 	 * because the insert might have changed the type of directory
1271 	 * we're dealing with.
1272 	 */
1273 	old_de_bh = ocfs2_find_entry(old_dentry->d_name.name,
1274 				     old_dentry->d_name.len,
1275 				     old_dir, &old_de);
1276 	if (!old_de_bh) {
1277 		status = -EIO;
1278 		goto bail;
1279 	}
1280 
1281 	status = ocfs2_delete_entry(handle, old_dir, old_de, old_de_bh);
1282 	if (status < 0) {
1283 		mlog_errno(status);
1284 		goto bail;
1285 	}
1286 
1287 	if (new_inode) {
1288 		new_inode->i_nlink--;
1289 		new_inode->i_ctime = CURRENT_TIME;
1290 	}
1291 	old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
1292 	if (old_inode_de_bh) {
1293 		status = ocfs2_update_entry(old_inode, handle, old_inode_de_bh,
1294 					    old_inode_dot_dot_de, new_dir);
1295 		old_dir->i_nlink--;
1296 		if (new_inode) {
1297 			new_inode->i_nlink--;
1298 		} else {
1299 			inc_nlink(new_dir);
1300 			mark_inode_dirty(new_dir);
1301 		}
1302 	}
1303 	mark_inode_dirty(old_dir);
1304 	ocfs2_mark_inode_dirty(handle, old_dir, old_dir_bh);
1305 	if (new_inode) {
1306 		mark_inode_dirty(new_inode);
1307 		ocfs2_mark_inode_dirty(handle, new_inode, newfe_bh);
1308 	}
1309 
1310 	if (old_dir != new_dir) {
1311 		/* Keep the same times on both directories.*/
1312 		new_dir->i_ctime = new_dir->i_mtime = old_dir->i_ctime;
1313 
1314 		/*
1315 		 * This will also pick up the i_nlink change from the
1316 		 * block above.
1317 		 */
1318 		ocfs2_mark_inode_dirty(handle, new_dir, new_dir_bh);
1319 	}
1320 
1321 	if (old_dir_nlink != old_dir->i_nlink) {
1322 		if (!old_dir_bh) {
1323 			mlog(ML_ERROR, "need to change nlink for old dir "
1324 			     "%llu from %d to %d but bh is NULL!\n",
1325 			     (unsigned long long)OCFS2_I(old_dir)->ip_blkno,
1326 			     (int)old_dir_nlink, old_dir->i_nlink);
1327 		} else {
1328 			struct ocfs2_dinode *fe;
1329 			status = ocfs2_journal_access(handle, old_dir,
1330 						      old_dir_bh,
1331 						      OCFS2_JOURNAL_ACCESS_WRITE);
1332 			fe = (struct ocfs2_dinode *) old_dir_bh->b_data;
1333 			fe->i_links_count = cpu_to_le16(old_dir->i_nlink);
1334 			status = ocfs2_journal_dirty(handle, old_dir_bh);
1335 		}
1336 	}
1337 
1338 	ocfs2_dentry_move(old_dentry, new_dentry, old_dir, new_dir);
1339 	status = 0;
1340 bail:
1341 	if (rename_lock)
1342 		ocfs2_rename_unlock(osb);
1343 
1344 	if (handle)
1345 		ocfs2_commit_trans(osb, handle);
1346 
1347 	if (parents_locked)
1348 		ocfs2_double_unlock(old_dir, new_dir);
1349 
1350 	if (old_child_locked)
1351 		ocfs2_meta_unlock(old_inode, 1);
1352 
1353 	if (new_child_locked)
1354 		ocfs2_meta_unlock(new_inode, 1);
1355 
1356 	if (orphan_dir) {
1357 		/* This was locked for us in ocfs2_prepare_orphan_dir() */
1358 		ocfs2_meta_unlock(orphan_dir, 1);
1359 		mutex_unlock(&orphan_dir->i_mutex);
1360 		iput(orphan_dir);
1361 	}
1362 
1363 	if (new_inode)
1364 		sync_mapping_buffers(old_inode->i_mapping);
1365 
1366 	if (new_inode)
1367 		iput(new_inode);
1368 	if (newfe_bh)
1369 		brelse(newfe_bh);
1370 	if (old_inode_bh)
1371 		brelse(old_inode_bh);
1372 	if (old_dir_bh)
1373 		brelse(old_dir_bh);
1374 	if (new_dir_bh)
1375 		brelse(new_dir_bh);
1376 	if (new_de_bh)
1377 		brelse(new_de_bh);
1378 	if (old_de_bh)
1379 		brelse(old_de_bh);
1380 	if (old_inode_de_bh)
1381 		brelse(old_inode_de_bh);
1382 	if (orphan_entry_bh)
1383 		brelse(orphan_entry_bh);
1384 	if (insert_entry_bh)
1385 		brelse(insert_entry_bh);
1386 
1387 	mlog_exit(status);
1388 
1389 	return status;
1390 }
1391 
1392 /*
1393  * we expect i_size = strlen(symname). Copy symname into the file
1394  * data, including the null terminator.
1395  */
1396 static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
1397 				     handle_t *handle,
1398 				     struct inode *inode,
1399 				     const char *symname)
1400 {
1401 	struct buffer_head **bhs = NULL;
1402 	const char *c;
1403 	struct super_block *sb = osb->sb;
1404 	u64 p_blkno, p_blocks;
1405 	int virtual, blocks, status, i, bytes_left;
1406 
1407 	bytes_left = i_size_read(inode) + 1;
1408 	/* we can't trust i_blocks because we're actually going to
1409 	 * write i_size + 1 bytes. */
1410 	blocks = (bytes_left + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
1411 
1412 	mlog_entry("i_blocks = %llu, i_size = %llu, blocks = %d\n",
1413 			(unsigned long long)inode->i_blocks,
1414 			i_size_read(inode), blocks);
1415 
1416 	/* Sanity check -- make sure we're going to fit. */
1417 	if (bytes_left >
1418 	    ocfs2_clusters_to_bytes(sb, OCFS2_I(inode)->ip_clusters)) {
1419 		status = -EIO;
1420 		mlog_errno(status);
1421 		goto bail;
1422 	}
1423 
1424 	bhs = kcalloc(blocks, sizeof(struct buffer_head *), GFP_KERNEL);
1425 	if (!bhs) {
1426 		status = -ENOMEM;
1427 		mlog_errno(status);
1428 		goto bail;
1429 	}
1430 
1431 	status = ocfs2_extent_map_get_blocks(inode, 0, &p_blkno, &p_blocks,
1432 					     NULL);
1433 	if (status < 0) {
1434 		mlog_errno(status);
1435 		goto bail;
1436 	}
1437 
1438 	/* links can never be larger than one cluster so we know this
1439 	 * is all going to be contiguous, but do a sanity check
1440 	 * anyway. */
1441 	if ((p_blocks << sb->s_blocksize_bits) < bytes_left) {
1442 		status = -EIO;
1443 		mlog_errno(status);
1444 		goto bail;
1445 	}
1446 
1447 	virtual = 0;
1448 	while(bytes_left > 0) {
1449 		c = &symname[virtual * sb->s_blocksize];
1450 
1451 		bhs[virtual] = sb_getblk(sb, p_blkno);
1452 		if (!bhs[virtual]) {
1453 			status = -ENOMEM;
1454 			mlog_errno(status);
1455 			goto bail;
1456 		}
1457 		ocfs2_set_new_buffer_uptodate(inode, bhs[virtual]);
1458 
1459 		status = ocfs2_journal_access(handle, inode, bhs[virtual],
1460 					      OCFS2_JOURNAL_ACCESS_CREATE);
1461 		if (status < 0) {
1462 			mlog_errno(status);
1463 			goto bail;
1464 		}
1465 
1466 		memset(bhs[virtual]->b_data, 0, sb->s_blocksize);
1467 
1468 		memcpy(bhs[virtual]->b_data, c,
1469 		       (bytes_left > sb->s_blocksize) ? sb->s_blocksize :
1470 		       bytes_left);
1471 
1472 		status = ocfs2_journal_dirty(handle, bhs[virtual]);
1473 		if (status < 0) {
1474 			mlog_errno(status);
1475 			goto bail;
1476 		}
1477 
1478 		virtual++;
1479 		p_blkno++;
1480 		bytes_left -= sb->s_blocksize;
1481 	}
1482 
1483 	status = 0;
1484 bail:
1485 
1486 	if (bhs) {
1487 		for(i = 0; i < blocks; i++)
1488 			if (bhs[i])
1489 				brelse(bhs[i]);
1490 		kfree(bhs);
1491 	}
1492 
1493 	mlog_exit(status);
1494 	return status;
1495 }
1496 
1497 static int ocfs2_symlink(struct inode *dir,
1498 			 struct dentry *dentry,
1499 			 const char *symname)
1500 {
1501 	int status, l, credits;
1502 	u64 newsize;
1503 	struct ocfs2_super *osb = NULL;
1504 	struct inode *inode = NULL;
1505 	struct super_block *sb;
1506 	struct buffer_head *new_fe_bh = NULL;
1507 	struct buffer_head *de_bh = NULL;
1508 	struct buffer_head *parent_fe_bh = NULL;
1509 	struct ocfs2_dinode *fe = NULL;
1510 	struct ocfs2_dinode *dirfe;
1511 	handle_t *handle = NULL;
1512 	struct ocfs2_alloc_context *inode_ac = NULL;
1513 	struct ocfs2_alloc_context *data_ac = NULL;
1514 
1515 	mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir,
1516 		   dentry, symname, dentry->d_name.len, dentry->d_name.name);
1517 
1518 	sb = dir->i_sb;
1519 	osb = OCFS2_SB(sb);
1520 
1521 	l = strlen(symname) + 1;
1522 
1523 	credits = ocfs2_calc_symlink_credits(sb);
1524 
1525 	/* lock the parent directory */
1526 	status = ocfs2_meta_lock(dir, &parent_fe_bh, 1);
1527 	if (status < 0) {
1528 		if (status != -ENOENT)
1529 			mlog_errno(status);
1530 		return status;
1531 	}
1532 
1533 	dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
1534 	if (!dirfe->i_links_count) {
1535 		/* can't make a file in a deleted directory. */
1536 		status = -ENOENT;
1537 		goto bail;
1538 	}
1539 
1540 	status = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
1541 					   dentry->d_name.len);
1542 	if (status)
1543 		goto bail;
1544 
1545 	status = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh,
1546 					      dentry->d_name.name,
1547 					      dentry->d_name.len, &de_bh);
1548 	if (status < 0) {
1549 		mlog_errno(status);
1550 		goto bail;
1551 	}
1552 
1553 	status = ocfs2_reserve_new_inode(osb, &inode_ac);
1554 	if (status < 0) {
1555 		if (status != -ENOSPC)
1556 			mlog_errno(status);
1557 		goto bail;
1558 	}
1559 
1560 	/* don't reserve bitmap space for fast symlinks. */
1561 	if (l > ocfs2_fast_symlink_chars(sb)) {
1562 		status = ocfs2_reserve_clusters(osb, 1, &data_ac);
1563 		if (status < 0) {
1564 			if (status != -ENOSPC)
1565 				mlog_errno(status);
1566 			goto bail;
1567 		}
1568 	}
1569 
1570 	handle = ocfs2_start_trans(osb, credits);
1571 	if (IS_ERR(handle)) {
1572 		status = PTR_ERR(handle);
1573 		handle = NULL;
1574 		mlog_errno(status);
1575 		goto bail;
1576 	}
1577 
1578 	status = ocfs2_mknod_locked(osb, dir, dentry,
1579 				    S_IFLNK | S_IRWXUGO, 0,
1580 				    &new_fe_bh, parent_fe_bh, handle,
1581 				    &inode, inode_ac);
1582 	if (status < 0) {
1583 		mlog_errno(status);
1584 		goto bail;
1585 	}
1586 
1587 	fe = (struct ocfs2_dinode *) new_fe_bh->b_data;
1588 	inode->i_rdev = 0;
1589 	newsize = l - 1;
1590 	if (l > ocfs2_fast_symlink_chars(sb)) {
1591 		u32 offset = 0;
1592 
1593 		inode->i_op = &ocfs2_symlink_inode_operations;
1594 		status = ocfs2_do_extend_allocation(osb, inode, &offset, 1, 0,
1595 						    new_fe_bh,
1596 						    handle, data_ac, NULL,
1597 						    NULL);
1598 		if (status < 0) {
1599 			if (status != -ENOSPC && status != -EINTR) {
1600 				mlog(ML_ERROR,
1601 				     "Failed to extend file to %llu\n",
1602 				     (unsigned long long)newsize);
1603 				mlog_errno(status);
1604 				status = -ENOSPC;
1605 			}
1606 			goto bail;
1607 		}
1608 		i_size_write(inode, newsize);
1609 		inode->i_blocks = ocfs2_inode_sector_count(inode);
1610 	} else {
1611 		inode->i_op = &ocfs2_fast_symlink_inode_operations;
1612 		memcpy((char *) fe->id2.i_symlink, symname, l);
1613 		i_size_write(inode, newsize);
1614 		inode->i_blocks = 0;
1615 	}
1616 
1617 	status = ocfs2_mark_inode_dirty(handle, inode, new_fe_bh);
1618 	if (status < 0) {
1619 		mlog_errno(status);
1620 		goto bail;
1621 	}
1622 
1623 	if (!ocfs2_inode_is_fast_symlink(inode)) {
1624 		status = ocfs2_create_symlink_data(osb, handle, inode,
1625 						   symname);
1626 		if (status < 0) {
1627 			mlog_errno(status);
1628 			goto bail;
1629 		}
1630 	}
1631 
1632 	status = ocfs2_add_entry(handle, dentry, inode,
1633 				 le64_to_cpu(fe->i_blkno), parent_fe_bh,
1634 				 de_bh);
1635 	if (status < 0) {
1636 		mlog_errno(status);
1637 		goto bail;
1638 	}
1639 
1640 	status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno);
1641 	if (status) {
1642 		mlog_errno(status);
1643 		goto bail;
1644 	}
1645 
1646 	insert_inode_hash(inode);
1647 	dentry->d_op = &ocfs2_dentry_ops;
1648 	d_instantiate(dentry, inode);
1649 bail:
1650 	if (handle)
1651 		ocfs2_commit_trans(osb, handle);
1652 
1653 	ocfs2_meta_unlock(dir, 1);
1654 
1655 	if (new_fe_bh)
1656 		brelse(new_fe_bh);
1657 	if (parent_fe_bh)
1658 		brelse(parent_fe_bh);
1659 	if (de_bh)
1660 		brelse(de_bh);
1661 	if (inode_ac)
1662 		ocfs2_free_alloc_context(inode_ac);
1663 	if (data_ac)
1664 		ocfs2_free_alloc_context(data_ac);
1665 	if ((status < 0) && inode)
1666 		iput(inode);
1667 
1668 	mlog_exit(status);
1669 
1670 	return status;
1671 }
1672 
1673 static int ocfs2_blkno_stringify(u64 blkno, char *name)
1674 {
1675 	int status, namelen;
1676 
1677 	mlog_entry_void();
1678 
1679 	namelen = snprintf(name, OCFS2_ORPHAN_NAMELEN + 1, "%016llx",
1680 			   (long long)blkno);
1681 	if (namelen <= 0) {
1682 		if (namelen)
1683 			status = namelen;
1684 		else
1685 			status = -EINVAL;
1686 		mlog_errno(status);
1687 		goto bail;
1688 	}
1689 	if (namelen != OCFS2_ORPHAN_NAMELEN) {
1690 		status = -EINVAL;
1691 		mlog_errno(status);
1692 		goto bail;
1693 	}
1694 
1695 	mlog(0, "built filename '%s' for orphan dir (len=%d)\n", name,
1696 	     namelen);
1697 
1698 	status = 0;
1699 bail:
1700 	mlog_exit(status);
1701 	return status;
1702 }
1703 
1704 static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
1705 				    struct inode **ret_orphan_dir,
1706 				    struct inode *inode,
1707 				    char *name,
1708 				    struct buffer_head **de_bh)
1709 {
1710 	struct inode *orphan_dir_inode;
1711 	struct buffer_head *orphan_dir_bh = NULL;
1712 	int status = 0;
1713 
1714 	status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name);
1715 	if (status < 0) {
1716 		mlog_errno(status);
1717 		return status;
1718 	}
1719 
1720 	orphan_dir_inode = ocfs2_get_system_file_inode(osb,
1721 						       ORPHAN_DIR_SYSTEM_INODE,
1722 						       osb->slot_num);
1723 	if (!orphan_dir_inode) {
1724 		status = -ENOENT;
1725 		mlog_errno(status);
1726 		return status;
1727 	}
1728 
1729 	mutex_lock(&orphan_dir_inode->i_mutex);
1730 
1731 	status = ocfs2_meta_lock(orphan_dir_inode, &orphan_dir_bh, 1);
1732 	if (status < 0) {
1733 		mlog_errno(status);
1734 		goto leave;
1735 	}
1736 
1737 	status = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode,
1738 					      orphan_dir_bh, name,
1739 					      OCFS2_ORPHAN_NAMELEN, de_bh);
1740 	if (status < 0) {
1741 		ocfs2_meta_unlock(orphan_dir_inode, 1);
1742 
1743 		mlog_errno(status);
1744 		goto leave;
1745 	}
1746 
1747 	*ret_orphan_dir = orphan_dir_inode;
1748 
1749 leave:
1750 	if (status) {
1751 		mutex_unlock(&orphan_dir_inode->i_mutex);
1752 		iput(orphan_dir_inode);
1753 	}
1754 
1755 	if (orphan_dir_bh)
1756 		brelse(orphan_dir_bh);
1757 
1758 	mlog_exit(status);
1759 	return status;
1760 }
1761 
1762 static int ocfs2_orphan_add(struct ocfs2_super *osb,
1763 			    handle_t *handle,
1764 			    struct inode *inode,
1765 			    struct ocfs2_dinode *fe,
1766 			    char *name,
1767 			    struct buffer_head *de_bh,
1768 			    struct inode *orphan_dir_inode)
1769 {
1770 	struct buffer_head *orphan_dir_bh = NULL;
1771 	int status = 0;
1772 	struct ocfs2_dinode *orphan_fe;
1773 
1774 	mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
1775 
1776 	status = ocfs2_read_block(osb,
1777 				  OCFS2_I(orphan_dir_inode)->ip_blkno,
1778 				  &orphan_dir_bh, OCFS2_BH_CACHED,
1779 				  orphan_dir_inode);
1780 	if (status < 0) {
1781 		mlog_errno(status);
1782 		goto leave;
1783 	}
1784 
1785 	status = ocfs2_journal_access(handle, orphan_dir_inode, orphan_dir_bh,
1786 				      OCFS2_JOURNAL_ACCESS_WRITE);
1787 	if (status < 0) {
1788 		mlog_errno(status);
1789 		goto leave;
1790 	}
1791 
1792 	/* we're a cluster, and nlink can change on disk from
1793 	 * underneath us... */
1794 	orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data;
1795 	if (S_ISDIR(inode->i_mode))
1796 		le16_add_cpu(&orphan_fe->i_links_count, 1);
1797 	orphan_dir_inode->i_nlink = le16_to_cpu(orphan_fe->i_links_count);
1798 
1799 	status = ocfs2_journal_dirty(handle, orphan_dir_bh);
1800 	if (status < 0) {
1801 		mlog_errno(status);
1802 		goto leave;
1803 	}
1804 
1805 	status = __ocfs2_add_entry(handle, orphan_dir_inode, name,
1806 				   OCFS2_ORPHAN_NAMELEN, inode,
1807 				   OCFS2_I(inode)->ip_blkno,
1808 				   orphan_dir_bh, de_bh);
1809 	if (status < 0) {
1810 		mlog_errno(status);
1811 		goto leave;
1812 	}
1813 
1814 	le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL);
1815 
1816 	/* Record which orphan dir our inode now resides
1817 	 * in. delete_inode will use this to determine which orphan
1818 	 * dir to lock. */
1819 	fe->i_orphaned_slot = cpu_to_le16(osb->slot_num);
1820 
1821 	mlog(0, "Inode %llu orphaned in slot %d\n",
1822 	     (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num);
1823 
1824 leave:
1825 	if (orphan_dir_bh)
1826 		brelse(orphan_dir_bh);
1827 
1828 	mlog_exit(status);
1829 	return status;
1830 }
1831 
1832 /* unlike orphan_add, we expect the orphan dir to already be locked here. */
1833 int ocfs2_orphan_del(struct ocfs2_super *osb,
1834 		     handle_t *handle,
1835 		     struct inode *orphan_dir_inode,
1836 		     struct inode *inode,
1837 		     struct buffer_head *orphan_dir_bh)
1838 {
1839 	char name[OCFS2_ORPHAN_NAMELEN + 1];
1840 	struct ocfs2_dinode *orphan_fe;
1841 	int status = 0;
1842 	struct buffer_head *target_de_bh = NULL;
1843 	struct ocfs2_dir_entry *target_de = NULL;
1844 
1845 	mlog_entry_void();
1846 
1847 	status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name);
1848 	if (status < 0) {
1849 		mlog_errno(status);
1850 		goto leave;
1851 	}
1852 
1853 	mlog(0, "removing '%s' from orphan dir %llu (namelen=%d)\n",
1854 	     name, (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno,
1855 	     OCFS2_ORPHAN_NAMELEN);
1856 
1857 	/* find it's spot in the orphan directory */
1858 	target_de_bh = ocfs2_find_entry(name, OCFS2_ORPHAN_NAMELEN,
1859 					orphan_dir_inode, &target_de);
1860 	if (!target_de_bh) {
1861 		status = -ENOENT;
1862 		mlog_errno(status);
1863 		goto leave;
1864 	}
1865 
1866 	/* remove it from the orphan directory */
1867 	status = ocfs2_delete_entry(handle, orphan_dir_inode, target_de,
1868 				    target_de_bh);
1869 	if (status < 0) {
1870 		mlog_errno(status);
1871 		goto leave;
1872 	}
1873 
1874 	status = ocfs2_journal_access(handle,orphan_dir_inode,  orphan_dir_bh,
1875 				      OCFS2_JOURNAL_ACCESS_WRITE);
1876 	if (status < 0) {
1877 		mlog_errno(status);
1878 		goto leave;
1879 	}
1880 
1881 	/* do the i_nlink dance! :) */
1882 	orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data;
1883 	if (S_ISDIR(inode->i_mode))
1884 		le16_add_cpu(&orphan_fe->i_links_count, -1);
1885 	orphan_dir_inode->i_nlink = le16_to_cpu(orphan_fe->i_links_count);
1886 
1887 	status = ocfs2_journal_dirty(handle, orphan_dir_bh);
1888 	if (status < 0) {
1889 		mlog_errno(status);
1890 		goto leave;
1891 	}
1892 
1893 leave:
1894 	if (target_de_bh)
1895 		brelse(target_de_bh);
1896 
1897 	mlog_exit(status);
1898 	return status;
1899 }
1900 
1901 const struct inode_operations ocfs2_dir_iops = {
1902 	.create		= ocfs2_create,
1903 	.lookup		= ocfs2_lookup,
1904 	.link		= ocfs2_link,
1905 	.unlink		= ocfs2_unlink,
1906 	.rmdir		= ocfs2_unlink,
1907 	.symlink	= ocfs2_symlink,
1908 	.mkdir		= ocfs2_mkdir,
1909 	.mknod		= ocfs2_mknod,
1910 	.rename		= ocfs2_rename,
1911 	.setattr	= ocfs2_setattr,
1912 	.getattr	= ocfs2_getattr,
1913 	.permission	= ocfs2_permission,
1914 };
1915