xref: /linux/fs/ocfs2/namei.c (revision 367b8112fe2ea5c39a7bb4d263dcdd9b612fae18)
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * namei.c
5  *
6  * Create and rename file, directory, symlinks
7  *
8  * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
9  *
10  *  Portions of this code from linux/fs/ext3/dir.c
11  *
12  *  Copyright (C) 1992, 1993, 1994, 1995
13  *  Remy Card (card@masi.ibp.fr)
14  *  Laboratoire MASI - Institut Blaise pascal
15  *  Universite Pierre et Marie Curie (Paris VI)
16  *
17  *   from
18  *
19  *   linux/fs/minix/dir.c
20  *
21  *   Copyright (C) 1991, 1992 Linux Torvalds
22  *
23  * This program is free software; you can redistribute it and/or
24  * modify it under the terms of the GNU General Public
25  * License as published by the Free Software Foundation; either
26  * version 2 of the License, or (at your option) any later version.
27  *
28  * This program is distributed in the hope that it will be useful,
29  * but WITHOUT ANY WARRANTY; without even the implied warranty of
30  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
31  * General Public License for more details.
32  *
33  * You should have received a copy of the GNU General Public
34  * License along with this program; if not, write to the
35  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
36  * Boston, MA 021110-1307, USA.
37  */
38 
39 #include <linux/fs.h>
40 #include <linux/types.h>
41 #include <linux/slab.h>
42 #include <linux/highmem.h>
43 
44 #define MLOG_MASK_PREFIX ML_NAMEI
45 #include <cluster/masklog.h>
46 
47 #include "ocfs2.h"
48 
49 #include "alloc.h"
50 #include "dcache.h"
51 #include "dir.h"
52 #include "dlmglue.h"
53 #include "extent_map.h"
54 #include "file.h"
55 #include "inode.h"
56 #include "journal.h"
57 #include "namei.h"
58 #include "suballoc.h"
59 #include "super.h"
60 #include "symlink.h"
61 #include "sysfile.h"
62 #include "uptodate.h"
63 #include "xattr.h"
64 
65 #include "buffer_head_io.h"
66 
67 static int ocfs2_mknod_locked(struct ocfs2_super *osb,
68 			      struct inode *dir,
69 			      struct dentry *dentry, int mode,
70 			      dev_t dev,
71 			      struct buffer_head **new_fe_bh,
72 			      struct buffer_head *parent_fe_bh,
73 			      handle_t *handle,
74 			      struct inode **ret_inode,
75 			      struct ocfs2_alloc_context *inode_ac);
76 
77 static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
78 				    struct inode **ret_orphan_dir,
79 				    struct inode *inode,
80 				    char *name,
81 				    struct buffer_head **de_bh);
82 
83 static int ocfs2_orphan_add(struct ocfs2_super *osb,
84 			    handle_t *handle,
85 			    struct inode *inode,
86 			    struct ocfs2_dinode *fe,
87 			    char *name,
88 			    struct buffer_head *de_bh,
89 			    struct inode *orphan_dir_inode);
90 
91 static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
92 				     handle_t *handle,
93 				     struct inode *inode,
94 				     const char *symname);
95 
96 /* An orphan dir name is an 8 byte value, printed as a hex string */
97 #define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64)))
98 
99 static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
100 				   struct nameidata *nd)
101 {
102 	int status;
103 	u64 blkno;
104 	struct inode *inode = NULL;
105 	struct dentry *ret;
106 	struct ocfs2_inode_info *oi;
107 
108 	mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry,
109 		   dentry->d_name.len, dentry->d_name.name);
110 
111 	if (dentry->d_name.len > OCFS2_MAX_FILENAME_LEN) {
112 		ret = ERR_PTR(-ENAMETOOLONG);
113 		goto bail;
114 	}
115 
116 	mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len,
117 	     dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno);
118 
119 	status = ocfs2_inode_lock(dir, NULL, 0);
120 	if (status < 0) {
121 		if (status != -ENOENT)
122 			mlog_errno(status);
123 		ret = ERR_PTR(status);
124 		goto bail;
125 	}
126 
127 	status = ocfs2_lookup_ino_from_name(dir, dentry->d_name.name,
128 					    dentry->d_name.len, &blkno);
129 	if (status < 0)
130 		goto bail_add;
131 
132 	inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0);
133 	if (IS_ERR(inode)) {
134 		ret = ERR_PTR(-EACCES);
135 		goto bail_unlock;
136 	}
137 
138 	oi = OCFS2_I(inode);
139 	/* Clear any orphaned state... If we were able to look up the
140 	 * inode from a directory, it certainly can't be orphaned. We
141 	 * might have the bad state from a node which intended to
142 	 * orphan this inode but crashed before it could commit the
143 	 * unlink. */
144 	spin_lock(&oi->ip_lock);
145 	oi->ip_flags &= ~OCFS2_INODE_MAYBE_ORPHANED;
146 	spin_unlock(&oi->ip_lock);
147 
148 bail_add:
149 	dentry->d_op = &ocfs2_dentry_ops;
150 	ret = d_splice_alias(inode, dentry);
151 
152 	if (inode) {
153 		/*
154 		 * If d_splice_alias() finds a DCACHE_DISCONNECTED
155 		 * dentry, it will d_move() it on top of ourse. The
156 		 * return value will indicate this however, so in
157 		 * those cases, we switch them around for the locking
158 		 * code.
159 		 *
160 		 * NOTE: This dentry already has ->d_op set from
161 		 * ocfs2_get_parent() and ocfs2_get_dentry()
162 		 */
163 		if (ret)
164 			dentry = ret;
165 
166 		status = ocfs2_dentry_attach_lock(dentry, inode,
167 						  OCFS2_I(dir)->ip_blkno);
168 		if (status) {
169 			mlog_errno(status);
170 			ret = ERR_PTR(status);
171 			goto bail_unlock;
172 		}
173 	}
174 
175 bail_unlock:
176 	/* Don't drop the cluster lock until *after* the d_add --
177 	 * unlink on another node will message us to remove that
178 	 * dentry under this lock so otherwise we can race this with
179 	 * the downconvert thread and have a stale dentry. */
180 	ocfs2_inode_unlock(dir, 0);
181 
182 bail:
183 
184 	mlog_exit_ptr(ret);
185 
186 	return ret;
187 }
188 
189 static int ocfs2_mknod(struct inode *dir,
190 		       struct dentry *dentry,
191 		       int mode,
192 		       dev_t dev)
193 {
194 	int status = 0;
195 	struct buffer_head *parent_fe_bh = NULL;
196 	handle_t *handle = NULL;
197 	struct ocfs2_super *osb;
198 	struct ocfs2_dinode *dirfe;
199 	struct buffer_head *new_fe_bh = NULL;
200 	struct buffer_head *de_bh = NULL;
201 	struct inode *inode = NULL;
202 	struct ocfs2_alloc_context *inode_ac = NULL;
203 	struct ocfs2_alloc_context *data_ac = NULL;
204 
205 	mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode,
206 		   (unsigned long)dev, dentry->d_name.len,
207 		   dentry->d_name.name);
208 
209 	/* get our super block */
210 	osb = OCFS2_SB(dir->i_sb);
211 
212 	status = ocfs2_inode_lock(dir, &parent_fe_bh, 1);
213 	if (status < 0) {
214 		if (status != -ENOENT)
215 			mlog_errno(status);
216 		return status;
217 	}
218 
219 	if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) {
220 		status = -EMLINK;
221 		goto leave;
222 	}
223 
224 	dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
225 	if (!dirfe->i_links_count) {
226 		/* can't make a file in a deleted directory. */
227 		status = -ENOENT;
228 		goto leave;
229 	}
230 
231 	status = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
232 					   dentry->d_name.len);
233 	if (status)
234 		goto leave;
235 
236 	/* get a spot inside the dir. */
237 	status = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh,
238 					      dentry->d_name.name,
239 					      dentry->d_name.len, &de_bh);
240 	if (status < 0) {
241 		mlog_errno(status);
242 		goto leave;
243 	}
244 
245 	/* reserve an inode spot */
246 	status = ocfs2_reserve_new_inode(osb, &inode_ac);
247 	if (status < 0) {
248 		if (status != -ENOSPC)
249 			mlog_errno(status);
250 		goto leave;
251 	}
252 
253 	/* Reserve a cluster if creating an extent based directory. */
254 	if (S_ISDIR(mode) && !ocfs2_supports_inline_data(osb)) {
255 		status = ocfs2_reserve_clusters(osb, 1, &data_ac);
256 		if (status < 0) {
257 			if (status != -ENOSPC)
258 				mlog_errno(status);
259 			goto leave;
260 		}
261 	}
262 
263 	handle = ocfs2_start_trans(osb, OCFS2_MKNOD_CREDITS);
264 	if (IS_ERR(handle)) {
265 		status = PTR_ERR(handle);
266 		handle = NULL;
267 		mlog_errno(status);
268 		goto leave;
269 	}
270 
271 	/* do the real work now. */
272 	status = ocfs2_mknod_locked(osb, dir, dentry, mode, dev,
273 				    &new_fe_bh, parent_fe_bh, handle,
274 				    &inode, inode_ac);
275 	if (status < 0) {
276 		mlog_errno(status);
277 		goto leave;
278 	}
279 
280 	if (S_ISDIR(mode)) {
281 		status = ocfs2_fill_new_dir(osb, handle, dir, inode,
282 					    new_fe_bh, data_ac);
283 		if (status < 0) {
284 			mlog_errno(status);
285 			goto leave;
286 		}
287 
288 		status = ocfs2_journal_access(handle, dir, parent_fe_bh,
289 					      OCFS2_JOURNAL_ACCESS_WRITE);
290 		if (status < 0) {
291 			mlog_errno(status);
292 			goto leave;
293 		}
294 		le16_add_cpu(&dirfe->i_links_count, 1);
295 		status = ocfs2_journal_dirty(handle, parent_fe_bh);
296 		if (status < 0) {
297 			mlog_errno(status);
298 			goto leave;
299 		}
300 		inc_nlink(dir);
301 	}
302 
303 	status = ocfs2_add_entry(handle, dentry, inode,
304 				 OCFS2_I(inode)->ip_blkno, parent_fe_bh,
305 				 de_bh);
306 	if (status < 0) {
307 		mlog_errno(status);
308 		goto leave;
309 	}
310 
311 	status = ocfs2_dentry_attach_lock(dentry, inode,
312 					  OCFS2_I(dir)->ip_blkno);
313 	if (status) {
314 		mlog_errno(status);
315 		goto leave;
316 	}
317 
318 	insert_inode_hash(inode);
319 	dentry->d_op = &ocfs2_dentry_ops;
320 	d_instantiate(dentry, inode);
321 	status = 0;
322 leave:
323 	if (handle)
324 		ocfs2_commit_trans(osb, handle);
325 
326 	ocfs2_inode_unlock(dir, 1);
327 
328 	if (status == -ENOSPC)
329 		mlog(0, "Disk is full\n");
330 
331 	brelse(new_fe_bh);
332 	brelse(de_bh);
333 	brelse(parent_fe_bh);
334 
335 	if ((status < 0) && inode)
336 		iput(inode);
337 
338 	if (inode_ac)
339 		ocfs2_free_alloc_context(inode_ac);
340 
341 	if (data_ac)
342 		ocfs2_free_alloc_context(data_ac);
343 
344 	mlog_exit(status);
345 
346 	return status;
347 }
348 
349 static int ocfs2_mknod_locked(struct ocfs2_super *osb,
350 			      struct inode *dir,
351 			      struct dentry *dentry, int mode,
352 			      dev_t dev,
353 			      struct buffer_head **new_fe_bh,
354 			      struct buffer_head *parent_fe_bh,
355 			      handle_t *handle,
356 			      struct inode **ret_inode,
357 			      struct ocfs2_alloc_context *inode_ac)
358 {
359 	int status = 0;
360 	struct ocfs2_dinode *fe = NULL;
361 	struct ocfs2_extent_list *fel;
362 	u64 fe_blkno = 0;
363 	u16 suballoc_bit;
364 	struct inode *inode = NULL;
365 
366 	mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode,
367 		   (unsigned long)dev, dentry->d_name.len,
368 		   dentry->d_name.name);
369 
370 	*new_fe_bh = NULL;
371 	*ret_inode = NULL;
372 
373 	status = ocfs2_claim_new_inode(osb, handle, inode_ac, &suballoc_bit,
374 				       &fe_blkno);
375 	if (status < 0) {
376 		mlog_errno(status);
377 		goto leave;
378 	}
379 
380 	inode = new_inode(dir->i_sb);
381 	if (IS_ERR(inode)) {
382 		status = PTR_ERR(inode);
383 		mlog(ML_ERROR, "new_inode failed!\n");
384 		goto leave;
385 	}
386 
387 	/* populate as many fields early on as possible - many of
388 	 * these are used by the support functions here and in
389 	 * callers. */
390 	inode->i_ino = ino_from_blkno(osb->sb, fe_blkno);
391 	OCFS2_I(inode)->ip_blkno = fe_blkno;
392 	if (S_ISDIR(mode))
393 		inode->i_nlink = 2;
394 	else
395 		inode->i_nlink = 1;
396 	inode->i_mode = mode;
397 	spin_lock(&osb->osb_lock);
398 	inode->i_generation = osb->s_next_generation++;
399 	spin_unlock(&osb->osb_lock);
400 
401 	*new_fe_bh = sb_getblk(osb->sb, fe_blkno);
402 	if (!*new_fe_bh) {
403 		status = -EIO;
404 		mlog_errno(status);
405 		goto leave;
406 	}
407 	ocfs2_set_new_buffer_uptodate(inode, *new_fe_bh);
408 
409 	status = ocfs2_journal_access(handle, inode, *new_fe_bh,
410 				      OCFS2_JOURNAL_ACCESS_CREATE);
411 	if (status < 0) {
412 		mlog_errno(status);
413 		goto leave;
414 	}
415 
416 	fe = (struct ocfs2_dinode *) (*new_fe_bh)->b_data;
417 	memset(fe, 0, osb->sb->s_blocksize);
418 
419 	fe->i_generation = cpu_to_le32(inode->i_generation);
420 	fe->i_fs_generation = cpu_to_le32(osb->fs_generation);
421 	fe->i_blkno = cpu_to_le64(fe_blkno);
422 	fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
423 	fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
424 	fe->i_uid = cpu_to_le32(current->fsuid);
425 	if (dir->i_mode & S_ISGID) {
426 		fe->i_gid = cpu_to_le32(dir->i_gid);
427 		if (S_ISDIR(mode))
428 			mode |= S_ISGID;
429 	} else
430 		fe->i_gid = cpu_to_le32(current->fsgid);
431 	fe->i_mode = cpu_to_le16(mode);
432 	if (S_ISCHR(mode) || S_ISBLK(mode))
433 		fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
434 
435 	fe->i_links_count = cpu_to_le16(inode->i_nlink);
436 
437 	fe->i_last_eb_blk = 0;
438 	strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE);
439 	le32_add_cpu(&fe->i_flags, OCFS2_VALID_FL);
440 	fe->i_atime = fe->i_ctime = fe->i_mtime =
441 		cpu_to_le64(CURRENT_TIME.tv_sec);
442 	fe->i_mtime_nsec = fe->i_ctime_nsec = fe->i_atime_nsec =
443 		cpu_to_le32(CURRENT_TIME.tv_nsec);
444 	fe->i_dtime = 0;
445 
446 	/*
447 	 * If supported, directories start with inline data.
448 	 */
449 	if (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) {
450 		u16 feat = le16_to_cpu(fe->i_dyn_features);
451 
452 		fe->i_dyn_features = cpu_to_le16(feat | OCFS2_INLINE_DATA_FL);
453 
454 		fe->id2.i_data.id_count = cpu_to_le16(ocfs2_max_inline_data(osb->sb));
455 	} else {
456 		fel = &fe->id2.i_list;
457 		fel->l_tree_depth = 0;
458 		fel->l_next_free_rec = 0;
459 		fel->l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(osb->sb));
460 	}
461 
462 	status = ocfs2_journal_dirty(handle, *new_fe_bh);
463 	if (status < 0) {
464 		mlog_errno(status);
465 		goto leave;
466 	}
467 
468 	if (ocfs2_populate_inode(inode, fe, 1) < 0) {
469 		mlog(ML_ERROR, "populate inode failed! bh->b_blocknr=%llu, "
470 		     "i_blkno=%llu, i_ino=%lu\n",
471 		     (unsigned long long)(*new_fe_bh)->b_blocknr,
472 		     (unsigned long long)le64_to_cpu(fe->i_blkno),
473 		     inode->i_ino);
474 		BUG();
475 	}
476 
477 	ocfs2_inode_set_new(osb, inode);
478 	if (!ocfs2_mount_local(osb)) {
479 		status = ocfs2_create_new_inode_locks(inode);
480 		if (status < 0)
481 			mlog_errno(status);
482 	}
483 
484 	status = 0; /* error in ocfs2_create_new_inode_locks is not
485 		     * critical */
486 
487 	*ret_inode = inode;
488 leave:
489 	if (status < 0) {
490 		if (*new_fe_bh) {
491 			brelse(*new_fe_bh);
492 			*new_fe_bh = NULL;
493 		}
494 		if (inode)
495 			iput(inode);
496 	}
497 
498 	mlog_exit(status);
499 	return status;
500 }
501 
502 static int ocfs2_mkdir(struct inode *dir,
503 		       struct dentry *dentry,
504 		       int mode)
505 {
506 	int ret;
507 
508 	mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry, mode,
509 		   dentry->d_name.len, dentry->d_name.name);
510 	ret = ocfs2_mknod(dir, dentry, mode | S_IFDIR, 0);
511 	mlog_exit(ret);
512 
513 	return ret;
514 }
515 
516 static int ocfs2_create(struct inode *dir,
517 			struct dentry *dentry,
518 			int mode,
519 			struct nameidata *nd)
520 {
521 	int ret;
522 
523 	mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry, mode,
524 		   dentry->d_name.len, dentry->d_name.name);
525 	ret = ocfs2_mknod(dir, dentry, mode | S_IFREG, 0);
526 	mlog_exit(ret);
527 
528 	return ret;
529 }
530 
531 static int ocfs2_link(struct dentry *old_dentry,
532 		      struct inode *dir,
533 		      struct dentry *dentry)
534 {
535 	handle_t *handle;
536 	struct inode *inode = old_dentry->d_inode;
537 	int err;
538 	struct buffer_head *fe_bh = NULL;
539 	struct buffer_head *parent_fe_bh = NULL;
540 	struct buffer_head *de_bh = NULL;
541 	struct ocfs2_dinode *fe = NULL;
542 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
543 
544 	mlog_entry("(inode=%lu, old='%.*s' new='%.*s')\n", inode->i_ino,
545 		   old_dentry->d_name.len, old_dentry->d_name.name,
546 		   dentry->d_name.len, dentry->d_name.name);
547 
548 	if (S_ISDIR(inode->i_mode))
549 		return -EPERM;
550 
551 	err = ocfs2_inode_lock(dir, &parent_fe_bh, 1);
552 	if (err < 0) {
553 		if (err != -ENOENT)
554 			mlog_errno(err);
555 		return err;
556 	}
557 
558 	if (!dir->i_nlink) {
559 		err = -ENOENT;
560 		goto out;
561 	}
562 
563 	err = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
564 					dentry->d_name.len);
565 	if (err)
566 		goto out;
567 
568 	err = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh,
569 					   dentry->d_name.name,
570 					   dentry->d_name.len, &de_bh);
571 	if (err < 0) {
572 		mlog_errno(err);
573 		goto out;
574 	}
575 
576 	err = ocfs2_inode_lock(inode, &fe_bh, 1);
577 	if (err < 0) {
578 		if (err != -ENOENT)
579 			mlog_errno(err);
580 		goto out;
581 	}
582 
583 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
584 	if (le16_to_cpu(fe->i_links_count) >= OCFS2_LINK_MAX) {
585 		err = -EMLINK;
586 		goto out_unlock_inode;
587 	}
588 
589 	handle = ocfs2_start_trans(osb, OCFS2_LINK_CREDITS);
590 	if (IS_ERR(handle)) {
591 		err = PTR_ERR(handle);
592 		handle = NULL;
593 		mlog_errno(err);
594 		goto out_unlock_inode;
595 	}
596 
597 	err = ocfs2_journal_access(handle, inode, fe_bh,
598 				   OCFS2_JOURNAL_ACCESS_WRITE);
599 	if (err < 0) {
600 		mlog_errno(err);
601 		goto out_commit;
602 	}
603 
604 	inc_nlink(inode);
605 	inode->i_ctime = CURRENT_TIME;
606 	fe->i_links_count = cpu_to_le16(inode->i_nlink);
607 	fe->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
608 	fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
609 
610 	err = ocfs2_journal_dirty(handle, fe_bh);
611 	if (err < 0) {
612 		le16_add_cpu(&fe->i_links_count, -1);
613 		drop_nlink(inode);
614 		mlog_errno(err);
615 		goto out_commit;
616 	}
617 
618 	err = ocfs2_add_entry(handle, dentry, inode,
619 			      OCFS2_I(inode)->ip_blkno,
620 			      parent_fe_bh, de_bh);
621 	if (err) {
622 		le16_add_cpu(&fe->i_links_count, -1);
623 		drop_nlink(inode);
624 		mlog_errno(err);
625 		goto out_commit;
626 	}
627 
628 	err = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno);
629 	if (err) {
630 		mlog_errno(err);
631 		goto out_commit;
632 	}
633 
634 	atomic_inc(&inode->i_count);
635 	dentry->d_op = &ocfs2_dentry_ops;
636 	d_instantiate(dentry, inode);
637 
638 out_commit:
639 	ocfs2_commit_trans(osb, handle);
640 out_unlock_inode:
641 	ocfs2_inode_unlock(inode, 1);
642 
643 out:
644 	ocfs2_inode_unlock(dir, 1);
645 
646 	brelse(de_bh);
647 	brelse(fe_bh);
648 	brelse(parent_fe_bh);
649 
650 	mlog_exit(err);
651 
652 	return err;
653 }
654 
655 /*
656  * Takes and drops an exclusive lock on the given dentry. This will
657  * force other nodes to drop it.
658  */
659 static int ocfs2_remote_dentry_delete(struct dentry *dentry)
660 {
661 	int ret;
662 
663 	ret = ocfs2_dentry_lock(dentry, 1);
664 	if (ret)
665 		mlog_errno(ret);
666 	else
667 		ocfs2_dentry_unlock(dentry, 1);
668 
669 	return ret;
670 }
671 
672 static inline int inode_is_unlinkable(struct inode *inode)
673 {
674 	if (S_ISDIR(inode->i_mode)) {
675 		if (inode->i_nlink == 2)
676 			return 1;
677 		return 0;
678 	}
679 
680 	if (inode->i_nlink == 1)
681 		return 1;
682 	return 0;
683 }
684 
685 static int ocfs2_unlink(struct inode *dir,
686 			struct dentry *dentry)
687 {
688 	int status;
689 	int child_locked = 0;
690 	struct inode *inode = dentry->d_inode;
691 	struct inode *orphan_dir = NULL;
692 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
693 	u64 blkno;
694 	struct ocfs2_dinode *fe = NULL;
695 	struct buffer_head *fe_bh = NULL;
696 	struct buffer_head *parent_node_bh = NULL;
697 	handle_t *handle = NULL;
698 	struct ocfs2_dir_entry *dirent = NULL;
699 	struct buffer_head *dirent_bh = NULL;
700 	char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
701 	struct buffer_head *orphan_entry_bh = NULL;
702 
703 	mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry,
704 		   dentry->d_name.len, dentry->d_name.name);
705 
706 	BUG_ON(dentry->d_parent->d_inode != dir);
707 
708 	mlog(0, "ino = %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);
709 
710 	if (inode == osb->root_inode) {
711 		mlog(0, "Cannot delete the root directory\n");
712 		return -EPERM;
713 	}
714 
715 	status = ocfs2_inode_lock(dir, &parent_node_bh, 1);
716 	if (status < 0) {
717 		if (status != -ENOENT)
718 			mlog_errno(status);
719 		return status;
720 	}
721 
722 	status = ocfs2_find_files_on_disk(dentry->d_name.name,
723 					  dentry->d_name.len, &blkno,
724 					  dir, &dirent_bh, &dirent);
725 	if (status < 0) {
726 		if (status != -ENOENT)
727 			mlog_errno(status);
728 		goto leave;
729 	}
730 
731 	if (OCFS2_I(inode)->ip_blkno != blkno) {
732 		status = -ENOENT;
733 
734 		mlog(0, "ip_blkno %llu != dirent blkno %llu ip_flags = %x\n",
735 		     (unsigned long long)OCFS2_I(inode)->ip_blkno,
736 		     (unsigned long long)blkno, OCFS2_I(inode)->ip_flags);
737 		goto leave;
738 	}
739 
740 	status = ocfs2_inode_lock(inode, &fe_bh, 1);
741 	if (status < 0) {
742 		if (status != -ENOENT)
743 			mlog_errno(status);
744 		goto leave;
745 	}
746 	child_locked = 1;
747 
748 	if (S_ISDIR(inode->i_mode)) {
749 	       	if (!ocfs2_empty_dir(inode)) {
750 			status = -ENOTEMPTY;
751 			goto leave;
752 		} else if (inode->i_nlink != 2) {
753 			status = -ENOTEMPTY;
754 			goto leave;
755 		}
756 	}
757 
758 	status = ocfs2_remote_dentry_delete(dentry);
759 	if (status < 0) {
760 		/* This remote delete should succeed under all normal
761 		 * circumstances. */
762 		mlog_errno(status);
763 		goto leave;
764 	}
765 
766 	if (inode_is_unlinkable(inode)) {
767 		status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, inode,
768 						  orphan_name,
769 						  &orphan_entry_bh);
770 		if (status < 0) {
771 			mlog_errno(status);
772 			goto leave;
773 		}
774 	}
775 
776 	handle = ocfs2_start_trans(osb, OCFS2_UNLINK_CREDITS);
777 	if (IS_ERR(handle)) {
778 		status = PTR_ERR(handle);
779 		handle = NULL;
780 		mlog_errno(status);
781 		goto leave;
782 	}
783 
784 	status = ocfs2_journal_access(handle, inode, fe_bh,
785 				      OCFS2_JOURNAL_ACCESS_WRITE);
786 	if (status < 0) {
787 		mlog_errno(status);
788 		goto leave;
789 	}
790 
791 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
792 
793 	if (inode_is_unlinkable(inode)) {
794 		status = ocfs2_orphan_add(osb, handle, inode, fe, orphan_name,
795 					  orphan_entry_bh, orphan_dir);
796 		if (status < 0) {
797 			mlog_errno(status);
798 			goto leave;
799 		}
800 	}
801 
802 	/* delete the name from the parent dir */
803 	status = ocfs2_delete_entry(handle, dir, dirent, dirent_bh);
804 	if (status < 0) {
805 		mlog_errno(status);
806 		goto leave;
807 	}
808 
809 	if (S_ISDIR(inode->i_mode))
810 		drop_nlink(inode);
811 	drop_nlink(inode);
812 	fe->i_links_count = cpu_to_le16(inode->i_nlink);
813 
814 	status = ocfs2_journal_dirty(handle, fe_bh);
815 	if (status < 0) {
816 		mlog_errno(status);
817 		goto leave;
818 	}
819 
820 	dir->i_ctime = dir->i_mtime = CURRENT_TIME;
821 	if (S_ISDIR(inode->i_mode))
822 		drop_nlink(dir);
823 
824 	status = ocfs2_mark_inode_dirty(handle, dir, parent_node_bh);
825 	if (status < 0) {
826 		mlog_errno(status);
827 		if (S_ISDIR(inode->i_mode))
828 			inc_nlink(dir);
829 	}
830 
831 leave:
832 	if (handle)
833 		ocfs2_commit_trans(osb, handle);
834 
835 	if (child_locked)
836 		ocfs2_inode_unlock(inode, 1);
837 
838 	ocfs2_inode_unlock(dir, 1);
839 
840 	if (orphan_dir) {
841 		/* This was locked for us in ocfs2_prepare_orphan_dir() */
842 		ocfs2_inode_unlock(orphan_dir, 1);
843 		mutex_unlock(&orphan_dir->i_mutex);
844 		iput(orphan_dir);
845 	}
846 
847 	brelse(fe_bh);
848 	brelse(dirent_bh);
849 	brelse(parent_node_bh);
850 	brelse(orphan_entry_bh);
851 
852 	mlog_exit(status);
853 
854 	return status;
855 }
856 
857 /*
858  * The only place this should be used is rename!
859  * if they have the same id, then the 1st one is the only one locked.
860  */
861 static int ocfs2_double_lock(struct ocfs2_super *osb,
862 			     struct buffer_head **bh1,
863 			     struct inode *inode1,
864 			     struct buffer_head **bh2,
865 			     struct inode *inode2)
866 {
867 	int status;
868 	struct ocfs2_inode_info *oi1 = OCFS2_I(inode1);
869 	struct ocfs2_inode_info *oi2 = OCFS2_I(inode2);
870 	struct buffer_head **tmpbh;
871 	struct inode *tmpinode;
872 
873 	mlog_entry("(inode1 = %llu, inode2 = %llu)\n",
874 		   (unsigned long long)oi1->ip_blkno,
875 		   (unsigned long long)oi2->ip_blkno);
876 
877 	if (*bh1)
878 		*bh1 = NULL;
879 	if (*bh2)
880 		*bh2 = NULL;
881 
882 	/* we always want to lock the one with the lower lockid first. */
883 	if (oi1->ip_blkno != oi2->ip_blkno) {
884 		if (oi1->ip_blkno < oi2->ip_blkno) {
885 			/* switch id1 and id2 around */
886 			mlog(0, "switching them around...\n");
887 			tmpbh = bh2;
888 			bh2 = bh1;
889 			bh1 = tmpbh;
890 
891 			tmpinode = inode2;
892 			inode2 = inode1;
893 			inode1 = tmpinode;
894 		}
895 		/* lock id2 */
896 		status = ocfs2_inode_lock(inode2, bh2, 1);
897 		if (status < 0) {
898 			if (status != -ENOENT)
899 				mlog_errno(status);
900 			goto bail;
901 		}
902 	}
903 
904 	/* lock id1 */
905 	status = ocfs2_inode_lock(inode1, bh1, 1);
906 	if (status < 0) {
907 		/*
908 		 * An error return must mean that no cluster locks
909 		 * were held on function exit.
910 		 */
911 		if (oi1->ip_blkno != oi2->ip_blkno)
912 			ocfs2_inode_unlock(inode2, 1);
913 
914 		if (status != -ENOENT)
915 			mlog_errno(status);
916 	}
917 
918 bail:
919 	mlog_exit(status);
920 	return status;
921 }
922 
923 static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2)
924 {
925 	ocfs2_inode_unlock(inode1, 1);
926 
927 	if (inode1 != inode2)
928 		ocfs2_inode_unlock(inode2, 1);
929 }
930 
931 static int ocfs2_rename(struct inode *old_dir,
932 			struct dentry *old_dentry,
933 			struct inode *new_dir,
934 			struct dentry *new_dentry)
935 {
936 	int status = 0, rename_lock = 0, parents_locked = 0;
937 	int old_child_locked = 0, new_child_locked = 0;
938 	struct inode *old_inode = old_dentry->d_inode;
939 	struct inode *new_inode = new_dentry->d_inode;
940 	struct inode *orphan_dir = NULL;
941 	struct ocfs2_dinode *newfe = NULL;
942 	char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
943 	struct buffer_head *orphan_entry_bh = NULL;
944 	struct buffer_head *newfe_bh = NULL;
945 	struct buffer_head *old_inode_bh = NULL;
946 	struct buffer_head *insert_entry_bh = NULL;
947 	struct ocfs2_super *osb = NULL;
948 	u64 newfe_blkno, old_de_ino;
949 	handle_t *handle = NULL;
950 	struct buffer_head *old_dir_bh = NULL;
951 	struct buffer_head *new_dir_bh = NULL;
952 	struct ocfs2_dir_entry *old_inode_dot_dot_de = NULL, *old_de = NULL,
953 		*new_de = NULL;
954 	struct buffer_head *new_de_bh = NULL, *old_de_bh = NULL; // bhs for above
955 	struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir,
956 						    // this is the 1st dirent bh
957 	nlink_t old_dir_nlink = old_dir->i_nlink;
958 	struct ocfs2_dinode *old_di;
959 
960 	/* At some point it might be nice to break this function up a
961 	 * bit. */
962 
963 	mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p, from='%.*s' to='%.*s')\n",
964 		   old_dir, old_dentry, new_dir, new_dentry,
965 		   old_dentry->d_name.len, old_dentry->d_name.name,
966 		   new_dentry->d_name.len, new_dentry->d_name.name);
967 
968 	osb = OCFS2_SB(old_dir->i_sb);
969 
970 	if (new_inode) {
971 		if (!igrab(new_inode))
972 			BUG();
973 	}
974 
975 	/* Assume a directory hierarchy thusly:
976 	 * a/b/c
977 	 * a/d
978 	 * a,b,c, and d are all directories.
979 	 *
980 	 * from cwd of 'a' on both nodes:
981 	 * node1: mv b/c d
982 	 * node2: mv d   b/c
983 	 *
984 	 * And that's why, just like the VFS, we need a file system
985 	 * rename lock. */
986 	if (old_dir != new_dir && S_ISDIR(old_inode->i_mode)) {
987 		status = ocfs2_rename_lock(osb);
988 		if (status < 0) {
989 			mlog_errno(status);
990 			goto bail;
991 		}
992 		rename_lock = 1;
993 	}
994 
995 	/* if old and new are the same, this'll just do one lock. */
996 	status = ocfs2_double_lock(osb, &old_dir_bh, old_dir,
997 				   &new_dir_bh, new_dir);
998 	if (status < 0) {
999 		mlog_errno(status);
1000 		goto bail;
1001 	}
1002 	parents_locked = 1;
1003 
1004 	/* make sure both dirs have bhs
1005 	 * get an extra ref on old_dir_bh if old==new */
1006 	if (!new_dir_bh) {
1007 		if (old_dir_bh) {
1008 			new_dir_bh = old_dir_bh;
1009 			get_bh(new_dir_bh);
1010 		} else {
1011 			mlog(ML_ERROR, "no old_dir_bh!\n");
1012 			status = -EIO;
1013 			goto bail;
1014 		}
1015 	}
1016 
1017 	/*
1018 	 * Aside from allowing a meta data update, the locking here
1019 	 * also ensures that the downconvert thread on other nodes
1020 	 * won't have to concurrently downconvert the inode and the
1021 	 * dentry locks.
1022 	 */
1023 	status = ocfs2_inode_lock(old_inode, &old_inode_bh, 1);
1024 	if (status < 0) {
1025 		if (status != -ENOENT)
1026 			mlog_errno(status);
1027 		goto bail;
1028 	}
1029 	old_child_locked = 1;
1030 
1031 	status = ocfs2_remote_dentry_delete(old_dentry);
1032 	if (status < 0) {
1033 		mlog_errno(status);
1034 		goto bail;
1035 	}
1036 
1037 	if (S_ISDIR(old_inode->i_mode)) {
1038 		u64 old_inode_parent;
1039 
1040 		status = ocfs2_find_files_on_disk("..", 2, &old_inode_parent,
1041 						  old_inode, &old_inode_de_bh,
1042 						  &old_inode_dot_dot_de);
1043 		if (status) {
1044 			status = -EIO;
1045 			goto bail;
1046 		}
1047 
1048 		if (old_inode_parent != OCFS2_I(old_dir)->ip_blkno) {
1049 			status = -EIO;
1050 			goto bail;
1051 		}
1052 
1053 		if (!new_inode && new_dir != old_dir &&
1054 		    new_dir->i_nlink >= OCFS2_LINK_MAX) {
1055 			status = -EMLINK;
1056 			goto bail;
1057 		}
1058 	}
1059 
1060 	status = ocfs2_lookup_ino_from_name(old_dir, old_dentry->d_name.name,
1061 					    old_dentry->d_name.len,
1062 					    &old_de_ino);
1063 	if (status) {
1064 		status = -ENOENT;
1065 		goto bail;
1066 	}
1067 
1068 	/*
1069 	 *  Check for inode number is _not_ due to possible IO errors.
1070 	 *  We might rmdir the source, keep it as pwd of some process
1071 	 *  and merrily kill the link to whatever was created under the
1072 	 *  same name. Goodbye sticky bit ;-<
1073 	 */
1074 	if (old_de_ino != OCFS2_I(old_inode)->ip_blkno) {
1075 		status = -ENOENT;
1076 		goto bail;
1077 	}
1078 
1079 	/* check if the target already exists (in which case we need
1080 	 * to delete it */
1081 	status = ocfs2_find_files_on_disk(new_dentry->d_name.name,
1082 					  new_dentry->d_name.len,
1083 					  &newfe_blkno, new_dir, &new_de_bh,
1084 					  &new_de);
1085 	/* The only error we allow here is -ENOENT because the new
1086 	 * file not existing is perfectly valid. */
1087 	if ((status < 0) && (status != -ENOENT)) {
1088 		/* If we cannot find the file specified we should just */
1089 		/* return the error... */
1090 		mlog_errno(status);
1091 		goto bail;
1092 	}
1093 
1094 	if (!new_de && new_inode) {
1095 		/*
1096 		 * Target was unlinked by another node while we were
1097 		 * waiting to get to ocfs2_rename(). There isn't
1098 		 * anything we can do here to help the situation, so
1099 		 * bubble up the appropriate error.
1100 		 */
1101 		status = -ENOENT;
1102 		goto bail;
1103 	}
1104 
1105 	/* In case we need to overwrite an existing file, we blow it
1106 	 * away first */
1107 	if (new_de) {
1108 		/* VFS didn't think there existed an inode here, but
1109 		 * someone else in the cluster must have raced our
1110 		 * rename to create one. Today we error cleanly, in
1111 		 * the future we should consider calling iget to build
1112 		 * a new struct inode for this entry. */
1113 		if (!new_inode) {
1114 			status = -EACCES;
1115 
1116 			mlog(0, "We found an inode for name %.*s but VFS "
1117 			     "didn't give us one.\n", new_dentry->d_name.len,
1118 			     new_dentry->d_name.name);
1119 			goto bail;
1120 		}
1121 
1122 		if (OCFS2_I(new_inode)->ip_blkno != newfe_blkno) {
1123 			status = -EACCES;
1124 
1125 			mlog(0, "Inode %llu and dir %llu disagree. flags = %x\n",
1126 			     (unsigned long long)OCFS2_I(new_inode)->ip_blkno,
1127 			     (unsigned long long)newfe_blkno,
1128 			     OCFS2_I(new_inode)->ip_flags);
1129 			goto bail;
1130 		}
1131 
1132 		status = ocfs2_inode_lock(new_inode, &newfe_bh, 1);
1133 		if (status < 0) {
1134 			if (status != -ENOENT)
1135 				mlog_errno(status);
1136 			goto bail;
1137 		}
1138 		new_child_locked = 1;
1139 
1140 		status = ocfs2_remote_dentry_delete(new_dentry);
1141 		if (status < 0) {
1142 			mlog_errno(status);
1143 			goto bail;
1144 		}
1145 
1146 		newfe = (struct ocfs2_dinode *) newfe_bh->b_data;
1147 
1148 		mlog(0, "aha rename over existing... new_de=%p new_blkno=%llu "
1149 		     "newfebh=%p bhblocknr=%llu\n", new_de,
1150 		     (unsigned long long)newfe_blkno, newfe_bh, newfe_bh ?
1151 		     (unsigned long long)newfe_bh->b_blocknr : 0ULL);
1152 
1153 		if (S_ISDIR(new_inode->i_mode) || (new_inode->i_nlink == 1)) {
1154 			status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
1155 							  new_inode,
1156 							  orphan_name,
1157 							  &orphan_entry_bh);
1158 			if (status < 0) {
1159 				mlog_errno(status);
1160 				goto bail;
1161 			}
1162 		}
1163 	} else {
1164 		BUG_ON(new_dentry->d_parent->d_inode != new_dir);
1165 
1166 		status = ocfs2_check_dir_for_entry(new_dir,
1167 						   new_dentry->d_name.name,
1168 						   new_dentry->d_name.len);
1169 		if (status)
1170 			goto bail;
1171 
1172 		status = ocfs2_prepare_dir_for_insert(osb, new_dir, new_dir_bh,
1173 						      new_dentry->d_name.name,
1174 						      new_dentry->d_name.len,
1175 						      &insert_entry_bh);
1176 		if (status < 0) {
1177 			mlog_errno(status);
1178 			goto bail;
1179 		}
1180 	}
1181 
1182 	handle = ocfs2_start_trans(osb, OCFS2_RENAME_CREDITS);
1183 	if (IS_ERR(handle)) {
1184 		status = PTR_ERR(handle);
1185 		handle = NULL;
1186 		mlog_errno(status);
1187 		goto bail;
1188 	}
1189 
1190 	if (new_de) {
1191 		if (S_ISDIR(new_inode->i_mode)) {
1192 			if (!ocfs2_empty_dir(new_inode) ||
1193 			    new_inode->i_nlink != 2) {
1194 				status = -ENOTEMPTY;
1195 				goto bail;
1196 			}
1197 		}
1198 		status = ocfs2_journal_access(handle, new_inode, newfe_bh,
1199 					      OCFS2_JOURNAL_ACCESS_WRITE);
1200 		if (status < 0) {
1201 			mlog_errno(status);
1202 			goto bail;
1203 		}
1204 
1205 		if (S_ISDIR(new_inode->i_mode) ||
1206 		    (newfe->i_links_count == cpu_to_le16(1))){
1207 			status = ocfs2_orphan_add(osb, handle, new_inode,
1208 						  newfe, orphan_name,
1209 						  orphan_entry_bh, orphan_dir);
1210 			if (status < 0) {
1211 				mlog_errno(status);
1212 				goto bail;
1213 			}
1214 		}
1215 
1216 		/* change the dirent to point to the correct inode */
1217 		status = ocfs2_update_entry(new_dir, handle, new_de_bh,
1218 					    new_de, old_inode);
1219 		if (status < 0) {
1220 			mlog_errno(status);
1221 			goto bail;
1222 		}
1223 		new_dir->i_version++;
1224 
1225 		if (S_ISDIR(new_inode->i_mode))
1226 			newfe->i_links_count = 0;
1227 		else
1228 			le16_add_cpu(&newfe->i_links_count, -1);
1229 
1230 		status = ocfs2_journal_dirty(handle, newfe_bh);
1231 		if (status < 0) {
1232 			mlog_errno(status);
1233 			goto bail;
1234 		}
1235 	} else {
1236 		/* if the name was not found in new_dir, add it now */
1237 		status = ocfs2_add_entry(handle, new_dentry, old_inode,
1238 					 OCFS2_I(old_inode)->ip_blkno,
1239 					 new_dir_bh, insert_entry_bh);
1240 	}
1241 
1242 	old_inode->i_ctime = CURRENT_TIME;
1243 	mark_inode_dirty(old_inode);
1244 
1245 	status = ocfs2_journal_access(handle, old_inode, old_inode_bh,
1246 				      OCFS2_JOURNAL_ACCESS_WRITE);
1247 	if (status >= 0) {
1248 		old_di = (struct ocfs2_dinode *) old_inode_bh->b_data;
1249 
1250 		old_di->i_ctime = cpu_to_le64(old_inode->i_ctime.tv_sec);
1251 		old_di->i_ctime_nsec = cpu_to_le32(old_inode->i_ctime.tv_nsec);
1252 
1253 		status = ocfs2_journal_dirty(handle, old_inode_bh);
1254 		if (status < 0)
1255 			mlog_errno(status);
1256 	} else
1257 		mlog_errno(status);
1258 
1259 	/*
1260 	 * Now that the name has been added to new_dir, remove the old name.
1261 	 *
1262 	 * We don't keep any directory entry context around until now
1263 	 * because the insert might have changed the type of directory
1264 	 * we're dealing with.
1265 	 */
1266 	old_de_bh = ocfs2_find_entry(old_dentry->d_name.name,
1267 				     old_dentry->d_name.len,
1268 				     old_dir, &old_de);
1269 	if (!old_de_bh) {
1270 		status = -EIO;
1271 		goto bail;
1272 	}
1273 
1274 	status = ocfs2_delete_entry(handle, old_dir, old_de, old_de_bh);
1275 	if (status < 0) {
1276 		mlog_errno(status);
1277 		goto bail;
1278 	}
1279 
1280 	if (new_inode) {
1281 		new_inode->i_nlink--;
1282 		new_inode->i_ctime = CURRENT_TIME;
1283 	}
1284 	old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
1285 	if (old_inode_de_bh) {
1286 		status = ocfs2_update_entry(old_inode, handle, old_inode_de_bh,
1287 					    old_inode_dot_dot_de, new_dir);
1288 		old_dir->i_nlink--;
1289 		if (new_inode) {
1290 			new_inode->i_nlink--;
1291 		} else {
1292 			inc_nlink(new_dir);
1293 			mark_inode_dirty(new_dir);
1294 		}
1295 	}
1296 	mark_inode_dirty(old_dir);
1297 	ocfs2_mark_inode_dirty(handle, old_dir, old_dir_bh);
1298 	if (new_inode) {
1299 		mark_inode_dirty(new_inode);
1300 		ocfs2_mark_inode_dirty(handle, new_inode, newfe_bh);
1301 	}
1302 
1303 	if (old_dir != new_dir) {
1304 		/* Keep the same times on both directories.*/
1305 		new_dir->i_ctime = new_dir->i_mtime = old_dir->i_ctime;
1306 
1307 		/*
1308 		 * This will also pick up the i_nlink change from the
1309 		 * block above.
1310 		 */
1311 		ocfs2_mark_inode_dirty(handle, new_dir, new_dir_bh);
1312 	}
1313 
1314 	if (old_dir_nlink != old_dir->i_nlink) {
1315 		if (!old_dir_bh) {
1316 			mlog(ML_ERROR, "need to change nlink for old dir "
1317 			     "%llu from %d to %d but bh is NULL!\n",
1318 			     (unsigned long long)OCFS2_I(old_dir)->ip_blkno,
1319 			     (int)old_dir_nlink, old_dir->i_nlink);
1320 		} else {
1321 			struct ocfs2_dinode *fe;
1322 			status = ocfs2_journal_access(handle, old_dir,
1323 						      old_dir_bh,
1324 						      OCFS2_JOURNAL_ACCESS_WRITE);
1325 			fe = (struct ocfs2_dinode *) old_dir_bh->b_data;
1326 			fe->i_links_count = cpu_to_le16(old_dir->i_nlink);
1327 			status = ocfs2_journal_dirty(handle, old_dir_bh);
1328 		}
1329 	}
1330 
1331 	ocfs2_dentry_move(old_dentry, new_dentry, old_dir, new_dir);
1332 	status = 0;
1333 bail:
1334 	if (rename_lock)
1335 		ocfs2_rename_unlock(osb);
1336 
1337 	if (handle)
1338 		ocfs2_commit_trans(osb, handle);
1339 
1340 	if (parents_locked)
1341 		ocfs2_double_unlock(old_dir, new_dir);
1342 
1343 	if (old_child_locked)
1344 		ocfs2_inode_unlock(old_inode, 1);
1345 
1346 	if (new_child_locked)
1347 		ocfs2_inode_unlock(new_inode, 1);
1348 
1349 	if (orphan_dir) {
1350 		/* This was locked for us in ocfs2_prepare_orphan_dir() */
1351 		ocfs2_inode_unlock(orphan_dir, 1);
1352 		mutex_unlock(&orphan_dir->i_mutex);
1353 		iput(orphan_dir);
1354 	}
1355 
1356 	if (new_inode)
1357 		sync_mapping_buffers(old_inode->i_mapping);
1358 
1359 	if (new_inode)
1360 		iput(new_inode);
1361 	brelse(newfe_bh);
1362 	brelse(old_inode_bh);
1363 	brelse(old_dir_bh);
1364 	brelse(new_dir_bh);
1365 	brelse(new_de_bh);
1366 	brelse(old_de_bh);
1367 	brelse(old_inode_de_bh);
1368 	brelse(orphan_entry_bh);
1369 	brelse(insert_entry_bh);
1370 
1371 	mlog_exit(status);
1372 
1373 	return status;
1374 }
1375 
1376 /*
1377  * we expect i_size = strlen(symname). Copy symname into the file
1378  * data, including the null terminator.
1379  */
1380 static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
1381 				     handle_t *handle,
1382 				     struct inode *inode,
1383 				     const char *symname)
1384 {
1385 	struct buffer_head **bhs = NULL;
1386 	const char *c;
1387 	struct super_block *sb = osb->sb;
1388 	u64 p_blkno, p_blocks;
1389 	int virtual, blocks, status, i, bytes_left;
1390 
1391 	bytes_left = i_size_read(inode) + 1;
1392 	/* we can't trust i_blocks because we're actually going to
1393 	 * write i_size + 1 bytes. */
1394 	blocks = (bytes_left + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
1395 
1396 	mlog_entry("i_blocks = %llu, i_size = %llu, blocks = %d\n",
1397 			(unsigned long long)inode->i_blocks,
1398 			i_size_read(inode), blocks);
1399 
1400 	/* Sanity check -- make sure we're going to fit. */
1401 	if (bytes_left >
1402 	    ocfs2_clusters_to_bytes(sb, OCFS2_I(inode)->ip_clusters)) {
1403 		status = -EIO;
1404 		mlog_errno(status);
1405 		goto bail;
1406 	}
1407 
1408 	bhs = kcalloc(blocks, sizeof(struct buffer_head *), GFP_KERNEL);
1409 	if (!bhs) {
1410 		status = -ENOMEM;
1411 		mlog_errno(status);
1412 		goto bail;
1413 	}
1414 
1415 	status = ocfs2_extent_map_get_blocks(inode, 0, &p_blkno, &p_blocks,
1416 					     NULL);
1417 	if (status < 0) {
1418 		mlog_errno(status);
1419 		goto bail;
1420 	}
1421 
1422 	/* links can never be larger than one cluster so we know this
1423 	 * is all going to be contiguous, but do a sanity check
1424 	 * anyway. */
1425 	if ((p_blocks << sb->s_blocksize_bits) < bytes_left) {
1426 		status = -EIO;
1427 		mlog_errno(status);
1428 		goto bail;
1429 	}
1430 
1431 	virtual = 0;
1432 	while(bytes_left > 0) {
1433 		c = &symname[virtual * sb->s_blocksize];
1434 
1435 		bhs[virtual] = sb_getblk(sb, p_blkno);
1436 		if (!bhs[virtual]) {
1437 			status = -ENOMEM;
1438 			mlog_errno(status);
1439 			goto bail;
1440 		}
1441 		ocfs2_set_new_buffer_uptodate(inode, bhs[virtual]);
1442 
1443 		status = ocfs2_journal_access(handle, inode, bhs[virtual],
1444 					      OCFS2_JOURNAL_ACCESS_CREATE);
1445 		if (status < 0) {
1446 			mlog_errno(status);
1447 			goto bail;
1448 		}
1449 
1450 		memset(bhs[virtual]->b_data, 0, sb->s_blocksize);
1451 
1452 		memcpy(bhs[virtual]->b_data, c,
1453 		       (bytes_left > sb->s_blocksize) ? sb->s_blocksize :
1454 		       bytes_left);
1455 
1456 		status = ocfs2_journal_dirty(handle, bhs[virtual]);
1457 		if (status < 0) {
1458 			mlog_errno(status);
1459 			goto bail;
1460 		}
1461 
1462 		virtual++;
1463 		p_blkno++;
1464 		bytes_left -= sb->s_blocksize;
1465 	}
1466 
1467 	status = 0;
1468 bail:
1469 
1470 	if (bhs) {
1471 		for(i = 0; i < blocks; i++)
1472 			brelse(bhs[i]);
1473 		kfree(bhs);
1474 	}
1475 
1476 	mlog_exit(status);
1477 	return status;
1478 }
1479 
1480 static int ocfs2_symlink(struct inode *dir,
1481 			 struct dentry *dentry,
1482 			 const char *symname)
1483 {
1484 	int status, l, credits;
1485 	u64 newsize;
1486 	struct ocfs2_super *osb = NULL;
1487 	struct inode *inode = NULL;
1488 	struct super_block *sb;
1489 	struct buffer_head *new_fe_bh = NULL;
1490 	struct buffer_head *de_bh = NULL;
1491 	struct buffer_head *parent_fe_bh = NULL;
1492 	struct ocfs2_dinode *fe = NULL;
1493 	struct ocfs2_dinode *dirfe;
1494 	handle_t *handle = NULL;
1495 	struct ocfs2_alloc_context *inode_ac = NULL;
1496 	struct ocfs2_alloc_context *data_ac = NULL;
1497 
1498 	mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir,
1499 		   dentry, symname, dentry->d_name.len, dentry->d_name.name);
1500 
1501 	sb = dir->i_sb;
1502 	osb = OCFS2_SB(sb);
1503 
1504 	l = strlen(symname) + 1;
1505 
1506 	credits = ocfs2_calc_symlink_credits(sb);
1507 
1508 	/* lock the parent directory */
1509 	status = ocfs2_inode_lock(dir, &parent_fe_bh, 1);
1510 	if (status < 0) {
1511 		if (status != -ENOENT)
1512 			mlog_errno(status);
1513 		return status;
1514 	}
1515 
1516 	dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
1517 	if (!dirfe->i_links_count) {
1518 		/* can't make a file in a deleted directory. */
1519 		status = -ENOENT;
1520 		goto bail;
1521 	}
1522 
1523 	status = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
1524 					   dentry->d_name.len);
1525 	if (status)
1526 		goto bail;
1527 
1528 	status = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh,
1529 					      dentry->d_name.name,
1530 					      dentry->d_name.len, &de_bh);
1531 	if (status < 0) {
1532 		mlog_errno(status);
1533 		goto bail;
1534 	}
1535 
1536 	status = ocfs2_reserve_new_inode(osb, &inode_ac);
1537 	if (status < 0) {
1538 		if (status != -ENOSPC)
1539 			mlog_errno(status);
1540 		goto bail;
1541 	}
1542 
1543 	/* don't reserve bitmap space for fast symlinks. */
1544 	if (l > ocfs2_fast_symlink_chars(sb)) {
1545 		status = ocfs2_reserve_clusters(osb, 1, &data_ac);
1546 		if (status < 0) {
1547 			if (status != -ENOSPC)
1548 				mlog_errno(status);
1549 			goto bail;
1550 		}
1551 	}
1552 
1553 	handle = ocfs2_start_trans(osb, credits);
1554 	if (IS_ERR(handle)) {
1555 		status = PTR_ERR(handle);
1556 		handle = NULL;
1557 		mlog_errno(status);
1558 		goto bail;
1559 	}
1560 
1561 	status = ocfs2_mknod_locked(osb, dir, dentry,
1562 				    S_IFLNK | S_IRWXUGO, 0,
1563 				    &new_fe_bh, parent_fe_bh, handle,
1564 				    &inode, inode_ac);
1565 	if (status < 0) {
1566 		mlog_errno(status);
1567 		goto bail;
1568 	}
1569 
1570 	fe = (struct ocfs2_dinode *) new_fe_bh->b_data;
1571 	inode->i_rdev = 0;
1572 	newsize = l - 1;
1573 	if (l > ocfs2_fast_symlink_chars(sb)) {
1574 		u32 offset = 0;
1575 
1576 		inode->i_op = &ocfs2_symlink_inode_operations;
1577 		status = ocfs2_add_inode_data(osb, inode, &offset, 1, 0,
1578 					      new_fe_bh,
1579 					      handle, data_ac, NULL,
1580 					      NULL);
1581 		if (status < 0) {
1582 			if (status != -ENOSPC && status != -EINTR) {
1583 				mlog(ML_ERROR,
1584 				     "Failed to extend file to %llu\n",
1585 				     (unsigned long long)newsize);
1586 				mlog_errno(status);
1587 				status = -ENOSPC;
1588 			}
1589 			goto bail;
1590 		}
1591 		i_size_write(inode, newsize);
1592 		inode->i_blocks = ocfs2_inode_sector_count(inode);
1593 	} else {
1594 		inode->i_op = &ocfs2_fast_symlink_inode_operations;
1595 		memcpy((char *) fe->id2.i_symlink, symname, l);
1596 		i_size_write(inode, newsize);
1597 		inode->i_blocks = 0;
1598 	}
1599 
1600 	status = ocfs2_mark_inode_dirty(handle, inode, new_fe_bh);
1601 	if (status < 0) {
1602 		mlog_errno(status);
1603 		goto bail;
1604 	}
1605 
1606 	if (!ocfs2_inode_is_fast_symlink(inode)) {
1607 		status = ocfs2_create_symlink_data(osb, handle, inode,
1608 						   symname);
1609 		if (status < 0) {
1610 			mlog_errno(status);
1611 			goto bail;
1612 		}
1613 	}
1614 
1615 	status = ocfs2_add_entry(handle, dentry, inode,
1616 				 le64_to_cpu(fe->i_blkno), parent_fe_bh,
1617 				 de_bh);
1618 	if (status < 0) {
1619 		mlog_errno(status);
1620 		goto bail;
1621 	}
1622 
1623 	status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno);
1624 	if (status) {
1625 		mlog_errno(status);
1626 		goto bail;
1627 	}
1628 
1629 	insert_inode_hash(inode);
1630 	dentry->d_op = &ocfs2_dentry_ops;
1631 	d_instantiate(dentry, inode);
1632 bail:
1633 	if (handle)
1634 		ocfs2_commit_trans(osb, handle);
1635 
1636 	ocfs2_inode_unlock(dir, 1);
1637 
1638 	brelse(new_fe_bh);
1639 	brelse(parent_fe_bh);
1640 	brelse(de_bh);
1641 	if (inode_ac)
1642 		ocfs2_free_alloc_context(inode_ac);
1643 	if (data_ac)
1644 		ocfs2_free_alloc_context(data_ac);
1645 	if ((status < 0) && inode)
1646 		iput(inode);
1647 
1648 	mlog_exit(status);
1649 
1650 	return status;
1651 }
1652 
1653 static int ocfs2_blkno_stringify(u64 blkno, char *name)
1654 {
1655 	int status, namelen;
1656 
1657 	mlog_entry_void();
1658 
1659 	namelen = snprintf(name, OCFS2_ORPHAN_NAMELEN + 1, "%016llx",
1660 			   (long long)blkno);
1661 	if (namelen <= 0) {
1662 		if (namelen)
1663 			status = namelen;
1664 		else
1665 			status = -EINVAL;
1666 		mlog_errno(status);
1667 		goto bail;
1668 	}
1669 	if (namelen != OCFS2_ORPHAN_NAMELEN) {
1670 		status = -EINVAL;
1671 		mlog_errno(status);
1672 		goto bail;
1673 	}
1674 
1675 	mlog(0, "built filename '%s' for orphan dir (len=%d)\n", name,
1676 	     namelen);
1677 
1678 	status = 0;
1679 bail:
1680 	mlog_exit(status);
1681 	return status;
1682 }
1683 
1684 static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
1685 				    struct inode **ret_orphan_dir,
1686 				    struct inode *inode,
1687 				    char *name,
1688 				    struct buffer_head **de_bh)
1689 {
1690 	struct inode *orphan_dir_inode;
1691 	struct buffer_head *orphan_dir_bh = NULL;
1692 	int status = 0;
1693 
1694 	status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name);
1695 	if (status < 0) {
1696 		mlog_errno(status);
1697 		return status;
1698 	}
1699 
1700 	orphan_dir_inode = ocfs2_get_system_file_inode(osb,
1701 						       ORPHAN_DIR_SYSTEM_INODE,
1702 						       osb->slot_num);
1703 	if (!orphan_dir_inode) {
1704 		status = -ENOENT;
1705 		mlog_errno(status);
1706 		return status;
1707 	}
1708 
1709 	mutex_lock(&orphan_dir_inode->i_mutex);
1710 
1711 	status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
1712 	if (status < 0) {
1713 		mlog_errno(status);
1714 		goto leave;
1715 	}
1716 
1717 	status = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode,
1718 					      orphan_dir_bh, name,
1719 					      OCFS2_ORPHAN_NAMELEN, de_bh);
1720 	if (status < 0) {
1721 		ocfs2_inode_unlock(orphan_dir_inode, 1);
1722 
1723 		mlog_errno(status);
1724 		goto leave;
1725 	}
1726 
1727 	*ret_orphan_dir = orphan_dir_inode;
1728 
1729 leave:
1730 	if (status) {
1731 		mutex_unlock(&orphan_dir_inode->i_mutex);
1732 		iput(orphan_dir_inode);
1733 	}
1734 
1735 	brelse(orphan_dir_bh);
1736 
1737 	mlog_exit(status);
1738 	return status;
1739 }
1740 
1741 static int ocfs2_orphan_add(struct ocfs2_super *osb,
1742 			    handle_t *handle,
1743 			    struct inode *inode,
1744 			    struct ocfs2_dinode *fe,
1745 			    char *name,
1746 			    struct buffer_head *de_bh,
1747 			    struct inode *orphan_dir_inode)
1748 {
1749 	struct buffer_head *orphan_dir_bh = NULL;
1750 	int status = 0;
1751 	struct ocfs2_dinode *orphan_fe;
1752 
1753 	mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
1754 
1755 	status = ocfs2_read_block(orphan_dir_inode,
1756 				  OCFS2_I(orphan_dir_inode)->ip_blkno,
1757 				  &orphan_dir_bh);
1758 	if (status < 0) {
1759 		mlog_errno(status);
1760 		goto leave;
1761 	}
1762 
1763 	status = ocfs2_journal_access(handle, orphan_dir_inode, orphan_dir_bh,
1764 				      OCFS2_JOURNAL_ACCESS_WRITE);
1765 	if (status < 0) {
1766 		mlog_errno(status);
1767 		goto leave;
1768 	}
1769 
1770 	/* we're a cluster, and nlink can change on disk from
1771 	 * underneath us... */
1772 	orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data;
1773 	if (S_ISDIR(inode->i_mode))
1774 		le16_add_cpu(&orphan_fe->i_links_count, 1);
1775 	orphan_dir_inode->i_nlink = le16_to_cpu(orphan_fe->i_links_count);
1776 
1777 	status = ocfs2_journal_dirty(handle, orphan_dir_bh);
1778 	if (status < 0) {
1779 		mlog_errno(status);
1780 		goto leave;
1781 	}
1782 
1783 	status = __ocfs2_add_entry(handle, orphan_dir_inode, name,
1784 				   OCFS2_ORPHAN_NAMELEN, inode,
1785 				   OCFS2_I(inode)->ip_blkno,
1786 				   orphan_dir_bh, de_bh);
1787 	if (status < 0) {
1788 		mlog_errno(status);
1789 		goto leave;
1790 	}
1791 
1792 	le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL);
1793 
1794 	/* Record which orphan dir our inode now resides
1795 	 * in. delete_inode will use this to determine which orphan
1796 	 * dir to lock. */
1797 	fe->i_orphaned_slot = cpu_to_le16(osb->slot_num);
1798 
1799 	mlog(0, "Inode %llu orphaned in slot %d\n",
1800 	     (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num);
1801 
1802 leave:
1803 	brelse(orphan_dir_bh);
1804 
1805 	mlog_exit(status);
1806 	return status;
1807 }
1808 
1809 /* unlike orphan_add, we expect the orphan dir to already be locked here. */
1810 int ocfs2_orphan_del(struct ocfs2_super *osb,
1811 		     handle_t *handle,
1812 		     struct inode *orphan_dir_inode,
1813 		     struct inode *inode,
1814 		     struct buffer_head *orphan_dir_bh)
1815 {
1816 	char name[OCFS2_ORPHAN_NAMELEN + 1];
1817 	struct ocfs2_dinode *orphan_fe;
1818 	int status = 0;
1819 	struct buffer_head *target_de_bh = NULL;
1820 	struct ocfs2_dir_entry *target_de = NULL;
1821 
1822 	mlog_entry_void();
1823 
1824 	status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name);
1825 	if (status < 0) {
1826 		mlog_errno(status);
1827 		goto leave;
1828 	}
1829 
1830 	mlog(0, "removing '%s' from orphan dir %llu (namelen=%d)\n",
1831 	     name, (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno,
1832 	     OCFS2_ORPHAN_NAMELEN);
1833 
1834 	/* find it's spot in the orphan directory */
1835 	target_de_bh = ocfs2_find_entry(name, OCFS2_ORPHAN_NAMELEN,
1836 					orphan_dir_inode, &target_de);
1837 	if (!target_de_bh) {
1838 		status = -ENOENT;
1839 		mlog_errno(status);
1840 		goto leave;
1841 	}
1842 
1843 	/* remove it from the orphan directory */
1844 	status = ocfs2_delete_entry(handle, orphan_dir_inode, target_de,
1845 				    target_de_bh);
1846 	if (status < 0) {
1847 		mlog_errno(status);
1848 		goto leave;
1849 	}
1850 
1851 	status = ocfs2_journal_access(handle,orphan_dir_inode,  orphan_dir_bh,
1852 				      OCFS2_JOURNAL_ACCESS_WRITE);
1853 	if (status < 0) {
1854 		mlog_errno(status);
1855 		goto leave;
1856 	}
1857 
1858 	/* do the i_nlink dance! :) */
1859 	orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data;
1860 	if (S_ISDIR(inode->i_mode))
1861 		le16_add_cpu(&orphan_fe->i_links_count, -1);
1862 	orphan_dir_inode->i_nlink = le16_to_cpu(orphan_fe->i_links_count);
1863 
1864 	status = ocfs2_journal_dirty(handle, orphan_dir_bh);
1865 	if (status < 0) {
1866 		mlog_errno(status);
1867 		goto leave;
1868 	}
1869 
1870 leave:
1871 	brelse(target_de_bh);
1872 
1873 	mlog_exit(status);
1874 	return status;
1875 }
1876 
1877 const struct inode_operations ocfs2_dir_iops = {
1878 	.create		= ocfs2_create,
1879 	.lookup		= ocfs2_lookup,
1880 	.link		= ocfs2_link,
1881 	.unlink		= ocfs2_unlink,
1882 	.rmdir		= ocfs2_unlink,
1883 	.symlink	= ocfs2_symlink,
1884 	.mkdir		= ocfs2_mkdir,
1885 	.mknod		= ocfs2_mknod,
1886 	.rename		= ocfs2_rename,
1887 	.setattr	= ocfs2_setattr,
1888 	.getattr	= ocfs2_getattr,
1889 	.permission	= ocfs2_permission,
1890 	.setxattr	= generic_setxattr,
1891 	.getxattr	= generic_getxattr,
1892 	.listxattr	= ocfs2_listxattr,
1893 	.removexattr	= generic_removexattr,
1894 };
1895