xref: /linux/fs/gfs2/glops.c (revision ff5599816711d2e67da2d7561fd36ac48debd433)
1 /*
2  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
3  * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
4  *
5  * This copyrighted material is made available to anyone wishing to use,
6  * modify, copy, or redistribute it subject to the terms and conditions
7  * of the GNU General Public License version 2.
8  */
9 
10 #include <linux/spinlock.h>
11 #include <linux/completion.h>
12 #include <linux/buffer_head.h>
13 #include <linux/gfs2_ondisk.h>
14 #include <linux/bio.h>
15 #include <linux/posix_acl.h>
16 
17 #include "gfs2.h"
18 #include "incore.h"
19 #include "bmap.h"
20 #include "glock.h"
21 #include "glops.h"
22 #include "inode.h"
23 #include "log.h"
24 #include "meta_io.h"
25 #include "recovery.h"
26 #include "rgrp.h"
27 #include "util.h"
28 #include "trans.h"
29 #include "dir.h"
30 
31 static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh)
32 {
33 	fs_err(gl->gl_sbd, "AIL buffer %p: blocknr %llu state 0x%08lx mapping %p page state 0x%lx\n",
34 	       bh, (unsigned long long)bh->b_blocknr, bh->b_state,
35 	       bh->b_page->mapping, bh->b_page->flags);
36 	fs_err(gl->gl_sbd, "AIL glock %u:%llu mapping %p\n",
37 	       gl->gl_name.ln_type, gl->gl_name.ln_number,
38 	       gfs2_glock2aspace(gl));
39 	gfs2_lm_withdraw(gl->gl_sbd, "AIL error\n");
40 }
41 
42 /**
43  * __gfs2_ail_flush - remove all buffers for a given lock from the AIL
44  * @gl: the glock
45  * @fsync: set when called from fsync (not all buffers will be clean)
46  *
47  * None of the buffers should be dirty, locked, or pinned.
48  */
49 
50 static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
51 {
52 	struct gfs2_sbd *sdp = gl->gl_sbd;
53 	struct list_head *head = &gl->gl_ail_list;
54 	struct gfs2_bufdata *bd, *tmp;
55 	struct buffer_head *bh;
56 	const unsigned long b_state = (1UL << BH_Dirty)|(1UL << BH_Pinned)|(1UL << BH_Lock);
57 
58 	gfs2_log_lock(sdp);
59 	spin_lock(&sdp->sd_ail_lock);
60 	list_for_each_entry_safe(bd, tmp, head, bd_ail_gl_list) {
61 		bh = bd->bd_bh;
62 		if (bh->b_state & b_state) {
63 			if (fsync)
64 				continue;
65 			gfs2_ail_error(gl, bh);
66 		}
67 		gfs2_trans_add_revoke(sdp, bd);
68 	}
69 	GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count));
70 	spin_unlock(&sdp->sd_ail_lock);
71 	gfs2_log_unlock(sdp);
72 }
73 
74 
75 static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
76 {
77 	struct gfs2_sbd *sdp = gl->gl_sbd;
78 	struct gfs2_trans tr;
79 
80 	memset(&tr, 0, sizeof(tr));
81 	tr.tr_revokes = atomic_read(&gl->gl_ail_count);
82 
83 	if (!tr.tr_revokes)
84 		return;
85 
86 	/* A shortened, inline version of gfs2_trans_begin() */
87 	tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
88 	tr.tr_ip = (unsigned long)__builtin_return_address(0);
89 	sb_start_intwrite(sdp->sd_vfs);
90 	gfs2_log_reserve(sdp, tr.tr_reserved);
91 	WARN_ON_ONCE(current->journal_info);
92 	current->journal_info = &tr;
93 
94 	__gfs2_ail_flush(gl, 0);
95 
96 	gfs2_trans_end(sdp);
97 	gfs2_log_flush(sdp, NULL);
98 }
99 
100 void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
101 {
102 	struct gfs2_sbd *sdp = gl->gl_sbd;
103 	unsigned int revokes = atomic_read(&gl->gl_ail_count);
104 	int ret;
105 
106 	if (!revokes)
107 		return;
108 
109 	ret = gfs2_trans_begin(sdp, 0, revokes);
110 	if (ret)
111 		return;
112 	__gfs2_ail_flush(gl, fsync);
113 	gfs2_trans_end(sdp);
114 	gfs2_log_flush(sdp, NULL);
115 }
116 
117 /**
118  * rgrp_go_sync - sync out the metadata for this glock
119  * @gl: the glock
120  *
121  * Called when demoting or unlocking an EX glock.  We must flush
122  * to disk all dirty buffers/pages relating to this glock, and must not
123  * not return to caller to demote/unlock the glock until I/O is complete.
124  */
125 
126 static void rgrp_go_sync(struct gfs2_glock *gl)
127 {
128 	struct address_space *metamapping = gfs2_glock2aspace(gl);
129 	struct gfs2_rgrpd *rgd;
130 	int error;
131 
132 	if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
133 		return;
134 	GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE);
135 
136 	gfs2_log_flush(gl->gl_sbd, gl);
137 	filemap_fdatawrite(metamapping);
138 	error = filemap_fdatawait(metamapping);
139         mapping_set_error(metamapping, error);
140 	gfs2_ail_empty_gl(gl);
141 
142 	spin_lock(&gl->gl_spin);
143 	rgd = gl->gl_object;
144 	if (rgd)
145 		gfs2_free_clones(rgd);
146 	spin_unlock(&gl->gl_spin);
147 }
148 
149 /**
150  * rgrp_go_inval - invalidate the metadata for this glock
151  * @gl: the glock
152  * @flags:
153  *
154  * We never used LM_ST_DEFERRED with resource groups, so that we
155  * should always see the metadata flag set here.
156  *
157  */
158 
159 static void rgrp_go_inval(struct gfs2_glock *gl, int flags)
160 {
161 	struct address_space *mapping = gfs2_glock2aspace(gl);
162 
163 	WARN_ON_ONCE(!(flags & DIO_METADATA));
164 	gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count));
165 	truncate_inode_pages(mapping, 0);
166 
167 	if (gl->gl_object) {
168 		struct gfs2_rgrpd *rgd = (struct gfs2_rgrpd *)gl->gl_object;
169 		rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
170 	}
171 }
172 
173 /**
174  * inode_go_sync - Sync the dirty data and/or metadata for an inode glock
175  * @gl: the glock protecting the inode
176  *
177  */
178 
179 static void inode_go_sync(struct gfs2_glock *gl)
180 {
181 	struct gfs2_inode *ip = gl->gl_object;
182 	struct address_space *metamapping = gfs2_glock2aspace(gl);
183 	int error;
184 
185 	if (ip && !S_ISREG(ip->i_inode.i_mode))
186 		ip = NULL;
187 	if (ip && test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
188 		unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0);
189 	if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
190 		return;
191 
192 	GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE);
193 
194 	gfs2_log_flush(gl->gl_sbd, gl);
195 	filemap_fdatawrite(metamapping);
196 	if (ip) {
197 		struct address_space *mapping = ip->i_inode.i_mapping;
198 		filemap_fdatawrite(mapping);
199 		error = filemap_fdatawait(mapping);
200 		mapping_set_error(mapping, error);
201 	}
202 	error = filemap_fdatawait(metamapping);
203 	mapping_set_error(metamapping, error);
204 	gfs2_ail_empty_gl(gl);
205 	/*
206 	 * Writeback of the data mapping may cause the dirty flag to be set
207 	 * so we have to clear it again here.
208 	 */
209 	smp_mb__before_clear_bit();
210 	clear_bit(GLF_DIRTY, &gl->gl_flags);
211 }
212 
213 /**
214  * inode_go_inval - prepare a inode glock to be released
215  * @gl: the glock
216  * @flags:
217  *
218  * Normally we invlidate everything, but if we are moving into
219  * LM_ST_DEFERRED from LM_ST_SHARED or LM_ST_EXCLUSIVE then we
220  * can keep hold of the metadata, since it won't have changed.
221  *
222  */
223 
224 static void inode_go_inval(struct gfs2_glock *gl, int flags)
225 {
226 	struct gfs2_inode *ip = gl->gl_object;
227 
228 	gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count));
229 
230 	if (flags & DIO_METADATA) {
231 		struct address_space *mapping = gfs2_glock2aspace(gl);
232 		truncate_inode_pages(mapping, 0);
233 		if (ip) {
234 			set_bit(GIF_INVALID, &ip->i_flags);
235 			forget_all_cached_acls(&ip->i_inode);
236 			gfs2_dir_hash_inval(ip);
237 		}
238 	}
239 
240 	if (ip == GFS2_I(gl->gl_sbd->sd_rindex)) {
241 		gfs2_log_flush(gl->gl_sbd, NULL);
242 		gl->gl_sbd->sd_rindex_uptodate = 0;
243 	}
244 	if (ip && S_ISREG(ip->i_inode.i_mode))
245 		truncate_inode_pages(ip->i_inode.i_mapping, 0);
246 }
247 
248 /**
249  * inode_go_demote_ok - Check to see if it's ok to unlock an inode glock
250  * @gl: the glock
251  *
252  * Returns: 1 if it's ok
253  */
254 
255 static int inode_go_demote_ok(const struct gfs2_glock *gl)
256 {
257 	struct gfs2_sbd *sdp = gl->gl_sbd;
258 	struct gfs2_holder *gh;
259 
260 	if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object)
261 		return 0;
262 
263 	if (!list_empty(&gl->gl_holders)) {
264 		gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
265 		if (gh->gh_list.next != &gl->gl_holders)
266 			return 0;
267 	}
268 
269 	return 1;
270 }
271 
272 /**
273  * gfs2_set_nlink - Set the inode's link count based on on-disk info
274  * @inode: The inode in question
275  * @nlink: The link count
276  *
277  * If the link count has hit zero, it must never be raised, whatever the
278  * on-disk inode might say. When new struct inodes are created the link
279  * count is set to 1, so that we can safely use this test even when reading
280  * in on disk information for the first time.
281  */
282 
283 static void gfs2_set_nlink(struct inode *inode, u32 nlink)
284 {
285 	/*
286 	 * We will need to review setting the nlink count here in the
287 	 * light of the forthcoming ro bind mount work. This is a reminder
288 	 * to do that.
289 	 */
290 	if ((inode->i_nlink != nlink) && (inode->i_nlink != 0)) {
291 		if (nlink == 0)
292 			clear_nlink(inode);
293 		else
294 			set_nlink(inode, nlink);
295 	}
296 }
297 
298 static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
299 {
300 	const struct gfs2_dinode *str = buf;
301 	struct timespec atime;
302 	u16 height, depth;
303 
304 	if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)))
305 		goto corrupt;
306 	ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
307 	ip->i_inode.i_mode = be32_to_cpu(str->di_mode);
308 	ip->i_inode.i_rdev = 0;
309 	switch (ip->i_inode.i_mode & S_IFMT) {
310 	case S_IFBLK:
311 	case S_IFCHR:
312 		ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major),
313 					   be32_to_cpu(str->di_minor));
314 		break;
315 	};
316 
317 	i_uid_write(&ip->i_inode, be32_to_cpu(str->di_uid));
318 	i_gid_write(&ip->i_inode, be32_to_cpu(str->di_gid));
319 	gfs2_set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink));
320 	i_size_write(&ip->i_inode, be64_to_cpu(str->di_size));
321 	gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
322 	atime.tv_sec = be64_to_cpu(str->di_atime);
323 	atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
324 	if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0)
325 		ip->i_inode.i_atime = atime;
326 	ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
327 	ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
328 	ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
329 	ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
330 
331 	ip->i_goal = be64_to_cpu(str->di_goal_meta);
332 	ip->i_generation = be64_to_cpu(str->di_generation);
333 
334 	ip->i_diskflags = be32_to_cpu(str->di_flags);
335 	ip->i_eattr = be64_to_cpu(str->di_eattr);
336 	/* i_diskflags and i_eattr must be set before gfs2_set_inode_flags() */
337 	gfs2_set_inode_flags(&ip->i_inode);
338 	height = be16_to_cpu(str->di_height);
339 	if (unlikely(height > GFS2_MAX_META_HEIGHT))
340 		goto corrupt;
341 	ip->i_height = (u8)height;
342 
343 	depth = be16_to_cpu(str->di_depth);
344 	if (unlikely(depth > GFS2_DIR_MAX_DEPTH))
345 		goto corrupt;
346 	ip->i_depth = (u8)depth;
347 	ip->i_entries = be32_to_cpu(str->di_entries);
348 
349 	if (S_ISREG(ip->i_inode.i_mode))
350 		gfs2_set_aops(&ip->i_inode);
351 
352 	return 0;
353 corrupt:
354 	gfs2_consist_inode(ip);
355 	return -EIO;
356 }
357 
358 /**
359  * gfs2_inode_refresh - Refresh the incore copy of the dinode
360  * @ip: The GFS2 inode
361  *
362  * Returns: errno
363  */
364 
365 int gfs2_inode_refresh(struct gfs2_inode *ip)
366 {
367 	struct buffer_head *dibh;
368 	int error;
369 
370 	error = gfs2_meta_inode_buffer(ip, &dibh);
371 	if (error)
372 		return error;
373 
374 	error = gfs2_dinode_in(ip, dibh->b_data);
375 	brelse(dibh);
376 	clear_bit(GIF_INVALID, &ip->i_flags);
377 
378 	return error;
379 }
380 
381 /**
382  * inode_go_lock - operation done after an inode lock is locked by a process
383  * @gl: the glock
384  * @flags:
385  *
386  * Returns: errno
387  */
388 
389 static int inode_go_lock(struct gfs2_holder *gh)
390 {
391 	struct gfs2_glock *gl = gh->gh_gl;
392 	struct gfs2_sbd *sdp = gl->gl_sbd;
393 	struct gfs2_inode *ip = gl->gl_object;
394 	int error = 0;
395 
396 	if (!ip || (gh->gh_flags & GL_SKIP))
397 		return 0;
398 
399 	if (test_bit(GIF_INVALID, &ip->i_flags)) {
400 		error = gfs2_inode_refresh(ip);
401 		if (error)
402 			return error;
403 	}
404 
405 	if ((ip->i_diskflags & GFS2_DIF_TRUNC_IN_PROG) &&
406 	    (gl->gl_state == LM_ST_EXCLUSIVE) &&
407 	    (gh->gh_state == LM_ST_EXCLUSIVE)) {
408 		spin_lock(&sdp->sd_trunc_lock);
409 		if (list_empty(&ip->i_trunc_list))
410 			list_add(&sdp->sd_trunc_list, &ip->i_trunc_list);
411 		spin_unlock(&sdp->sd_trunc_lock);
412 		wake_up(&sdp->sd_quota_wait);
413 		return 1;
414 	}
415 
416 	return error;
417 }
418 
419 /**
420  * inode_go_dump - print information about an inode
421  * @seq: The iterator
422  * @ip: the inode
423  *
424  * Returns: 0 on success, -ENOBUFS when we run out of space
425  */
426 
427 static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
428 {
429 	const struct gfs2_inode *ip = gl->gl_object;
430 	if (ip == NULL)
431 		return 0;
432 	gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu\n",
433 		  (unsigned long long)ip->i_no_formal_ino,
434 		  (unsigned long long)ip->i_no_addr,
435 		  IF2DT(ip->i_inode.i_mode), ip->i_flags,
436 		  (unsigned int)ip->i_diskflags,
437 		  (unsigned long long)i_size_read(&ip->i_inode));
438 	return 0;
439 }
440 
441 /**
442  * trans_go_sync - promote/demote the transaction glock
443  * @gl: the glock
444  * @state: the requested state
445  * @flags:
446  *
447  */
448 
449 static void trans_go_sync(struct gfs2_glock *gl)
450 {
451 	struct gfs2_sbd *sdp = gl->gl_sbd;
452 
453 	if (gl->gl_state != LM_ST_UNLOCKED &&
454 	    test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
455 		gfs2_meta_syncfs(sdp);
456 		gfs2_log_shutdown(sdp);
457 	}
458 }
459 
460 /**
461  * trans_go_xmote_bh - After promoting/demoting the transaction glock
462  * @gl: the glock
463  *
464  */
465 
466 static int trans_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh)
467 {
468 	struct gfs2_sbd *sdp = gl->gl_sbd;
469 	struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
470 	struct gfs2_glock *j_gl = ip->i_gl;
471 	struct gfs2_log_header_host head;
472 	int error;
473 
474 	if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
475 		j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
476 
477 		error = gfs2_find_jhead(sdp->sd_jdesc, &head);
478 		if (error)
479 			gfs2_consist(sdp);
480 		if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT))
481 			gfs2_consist(sdp);
482 
483 		/*  Initialize some head of the log stuff  */
484 		if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) {
485 			sdp->sd_log_sequence = head.lh_sequence + 1;
486 			gfs2_log_pointers_init(sdp, head.lh_blkno);
487 		}
488 	}
489 	return 0;
490 }
491 
492 /**
493  * trans_go_demote_ok
494  * @gl: the glock
495  *
496  * Always returns 0
497  */
498 
499 static int trans_go_demote_ok(const struct gfs2_glock *gl)
500 {
501 	return 0;
502 }
503 
504 /**
505  * iopen_go_callback - schedule the dcache entry for the inode to be deleted
506  * @gl: the glock
507  *
508  * gl_spin lock is held while calling this
509  */
510 static void iopen_go_callback(struct gfs2_glock *gl, bool remote)
511 {
512 	struct gfs2_inode *ip = (struct gfs2_inode *)gl->gl_object;
513 	struct gfs2_sbd *sdp = gl->gl_sbd;
514 
515 	if (!remote || (sdp->sd_vfs->s_flags & MS_RDONLY))
516 		return;
517 
518 	if (gl->gl_demote_state == LM_ST_UNLOCKED &&
519 	    gl->gl_state == LM_ST_SHARED && ip) {
520 		gfs2_glock_hold(gl);
521 		if (queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
522 			gfs2_glock_put_nolock(gl);
523 	}
524 }
525 
526 const struct gfs2_glock_operations gfs2_meta_glops = {
527 	.go_type = LM_TYPE_META,
528 };
529 
530 const struct gfs2_glock_operations gfs2_inode_glops = {
531 	.go_sync = inode_go_sync,
532 	.go_inval = inode_go_inval,
533 	.go_demote_ok = inode_go_demote_ok,
534 	.go_lock = inode_go_lock,
535 	.go_dump = inode_go_dump,
536 	.go_type = LM_TYPE_INODE,
537 	.go_flags = GLOF_ASPACE,
538 };
539 
540 const struct gfs2_glock_operations gfs2_rgrp_glops = {
541 	.go_sync = rgrp_go_sync,
542 	.go_inval = rgrp_go_inval,
543 	.go_lock = gfs2_rgrp_go_lock,
544 	.go_unlock = gfs2_rgrp_go_unlock,
545 	.go_dump = gfs2_rgrp_dump,
546 	.go_type = LM_TYPE_RGRP,
547 	.go_flags = GLOF_ASPACE | GLOF_LVB,
548 };
549 
550 const struct gfs2_glock_operations gfs2_trans_glops = {
551 	.go_sync = trans_go_sync,
552 	.go_xmote_bh = trans_go_xmote_bh,
553 	.go_demote_ok = trans_go_demote_ok,
554 	.go_type = LM_TYPE_NONDISK,
555 };
556 
557 const struct gfs2_glock_operations gfs2_iopen_glops = {
558 	.go_type = LM_TYPE_IOPEN,
559 	.go_callback = iopen_go_callback,
560 };
561 
562 const struct gfs2_glock_operations gfs2_flock_glops = {
563 	.go_type = LM_TYPE_FLOCK,
564 };
565 
566 const struct gfs2_glock_operations gfs2_nondisk_glops = {
567 	.go_type = LM_TYPE_NONDISK,
568 };
569 
570 const struct gfs2_glock_operations gfs2_quota_glops = {
571 	.go_type = LM_TYPE_QUOTA,
572 	.go_flags = GLOF_LVB,
573 };
574 
575 const struct gfs2_glock_operations gfs2_journal_glops = {
576 	.go_type = LM_TYPE_JOURNAL,
577 };
578 
579 const struct gfs2_glock_operations *gfs2_glops_list[] = {
580 	[LM_TYPE_META] = &gfs2_meta_glops,
581 	[LM_TYPE_INODE] = &gfs2_inode_glops,
582 	[LM_TYPE_RGRP] = &gfs2_rgrp_glops,
583 	[LM_TYPE_IOPEN] = &gfs2_iopen_glops,
584 	[LM_TYPE_FLOCK] = &gfs2_flock_glops,
585 	[LM_TYPE_NONDISK] = &gfs2_nondisk_glops,
586 	[LM_TYPE_QUOTA] = &gfs2_quota_glops,
587 	[LM_TYPE_JOURNAL] = &gfs2_journal_glops,
588 };
589 
590