xref: /linux/fs/ocfs2/resize.c (revision 9cfc5c90ad38c8fc11bfd39de42a107da00871ba)
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * resize.c
5  *
6  * volume resize.
7  * Inspired by ext3/resize.c.
8  *
9  * Copyright (C) 2007 Oracle.  All rights reserved.
10  *
11  * This program is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU General Public
13  * License as published by the Free Software Foundation; either
14  * version 2 of the License, or (at your option) any later version.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public
22  * License along with this program; if not, write to the
23  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24  * Boston, MA 021110-1307, USA.
25  */
26 
27 #include <linux/fs.h>
28 #include <linux/types.h>
29 
30 #include <cluster/masklog.h>
31 
32 #include "ocfs2.h"
33 
34 #include "alloc.h"
35 #include "dlmglue.h"
36 #include "inode.h"
37 #include "journal.h"
38 #include "super.h"
39 #include "sysfile.h"
40 #include "uptodate.h"
41 #include "ocfs2_trace.h"
42 
43 #include "buffer_head_io.h"
44 #include "suballoc.h"
45 #include "resize.h"
46 
47 /*
48  * Check whether there are new backup superblocks exist
49  * in the last group. If there are some, mark them or clear
50  * them in the bitmap.
51  *
52  * Return how many backups we find in the last group.
53  */
54 static u16 ocfs2_calc_new_backup_super(struct inode *inode,
55 				       struct ocfs2_group_desc *gd,
56 				       u16 cl_cpg,
57 				       int set)
58 {
59 	int i;
60 	u16 backups = 0;
61 	u32 cluster;
62 	u64 blkno, gd_blkno, lgd_blkno = le64_to_cpu(gd->bg_blkno);
63 
64 	for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) {
65 		blkno = ocfs2_backup_super_blkno(inode->i_sb, i);
66 		cluster = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
67 
68 		gd_blkno = ocfs2_which_cluster_group(inode, cluster);
69 		if (gd_blkno < lgd_blkno)
70 			continue;
71 		else if (gd_blkno > lgd_blkno)
72 			break;
73 
74 		if (set)
75 			ocfs2_set_bit(cluster % cl_cpg,
76 				      (unsigned long *)gd->bg_bitmap);
77 		else
78 			ocfs2_clear_bit(cluster % cl_cpg,
79 					(unsigned long *)gd->bg_bitmap);
80 		backups++;
81 	}
82 
83 	return backups;
84 }
85 
86 static int ocfs2_update_last_group_and_inode(handle_t *handle,
87 					     struct inode *bm_inode,
88 					     struct buffer_head *bm_bh,
89 					     struct buffer_head *group_bh,
90 					     u32 first_new_cluster,
91 					     int new_clusters)
92 {
93 	int ret = 0;
94 	struct ocfs2_super *osb = OCFS2_SB(bm_inode->i_sb);
95 	struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bm_bh->b_data;
96 	struct ocfs2_chain_list *cl = &fe->id2.i_chain;
97 	struct ocfs2_chain_rec *cr;
98 	struct ocfs2_group_desc *group;
99 	u16 chain, num_bits, backups = 0;
100 	u16 cl_bpc = le16_to_cpu(cl->cl_bpc);
101 	u16 cl_cpg = le16_to_cpu(cl->cl_cpg);
102 
103 	trace_ocfs2_update_last_group_and_inode(new_clusters,
104 						first_new_cluster);
105 
106 	ret = ocfs2_journal_access_gd(handle, INODE_CACHE(bm_inode),
107 				      group_bh, OCFS2_JOURNAL_ACCESS_WRITE);
108 	if (ret < 0) {
109 		mlog_errno(ret);
110 		goto out;
111 	}
112 
113 	group = (struct ocfs2_group_desc *)group_bh->b_data;
114 
115 	/* update the group first. */
116 	num_bits = new_clusters * cl_bpc;
117 	le16_add_cpu(&group->bg_bits, num_bits);
118 	le16_add_cpu(&group->bg_free_bits_count, num_bits);
119 
120 	/*
121 	 * check whether there are some new backup superblocks exist in
122 	 * this group and update the group bitmap accordingly.
123 	 */
124 	if (OCFS2_HAS_COMPAT_FEATURE(osb->sb,
125 				     OCFS2_FEATURE_COMPAT_BACKUP_SB)) {
126 		backups = ocfs2_calc_new_backup_super(bm_inode,
127 						     group,
128 						     cl_cpg, 1);
129 		le16_add_cpu(&group->bg_free_bits_count, -1 * backups);
130 	}
131 
132 	ocfs2_journal_dirty(handle, group_bh);
133 
134 	/* update the inode accordingly. */
135 	ret = ocfs2_journal_access_di(handle, INODE_CACHE(bm_inode), bm_bh,
136 				      OCFS2_JOURNAL_ACCESS_WRITE);
137 	if (ret < 0) {
138 		mlog_errno(ret);
139 		goto out_rollback;
140 	}
141 
142 	chain = le16_to_cpu(group->bg_chain);
143 	cr = (&cl->cl_recs[chain]);
144 	le32_add_cpu(&cr->c_total, num_bits);
145 	le32_add_cpu(&cr->c_free, num_bits);
146 	le32_add_cpu(&fe->id1.bitmap1.i_total, num_bits);
147 	le32_add_cpu(&fe->i_clusters, new_clusters);
148 
149 	if (backups) {
150 		le32_add_cpu(&cr->c_free, -1 * backups);
151 		le32_add_cpu(&fe->id1.bitmap1.i_used, backups);
152 	}
153 
154 	spin_lock(&OCFS2_I(bm_inode)->ip_lock);
155 	OCFS2_I(bm_inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
156 	le64_add_cpu(&fe->i_size, (u64)new_clusters << osb->s_clustersize_bits);
157 	spin_unlock(&OCFS2_I(bm_inode)->ip_lock);
158 	i_size_write(bm_inode, le64_to_cpu(fe->i_size));
159 
160 	ocfs2_journal_dirty(handle, bm_bh);
161 
162 out_rollback:
163 	if (ret < 0) {
164 		ocfs2_calc_new_backup_super(bm_inode,
165 					    group,
166 					    cl_cpg, 0);
167 		le16_add_cpu(&group->bg_free_bits_count, backups);
168 		le16_add_cpu(&group->bg_bits, -1 * num_bits);
169 		le16_add_cpu(&group->bg_free_bits_count, -1 * num_bits);
170 	}
171 out:
172 	if (ret)
173 		mlog_errno(ret);
174 	return ret;
175 }
176 
177 static int update_backups(struct inode * inode, u32 clusters, char *data)
178 {
179 	int i, ret = 0;
180 	u32 cluster;
181 	u64 blkno;
182 	struct buffer_head *backup = NULL;
183 	struct ocfs2_dinode *backup_di = NULL;
184 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
185 
186 	/* calculate the real backups we need to update. */
187 	for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) {
188 		blkno = ocfs2_backup_super_blkno(inode->i_sb, i);
189 		cluster = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
190 		if (cluster > clusters)
191 			break;
192 
193 		ret = ocfs2_read_blocks_sync(osb, blkno, 1, &backup);
194 		if (ret < 0) {
195 			mlog_errno(ret);
196 			break;
197 		}
198 
199 		memcpy(backup->b_data, data, inode->i_sb->s_blocksize);
200 
201 		backup_di = (struct ocfs2_dinode *)backup->b_data;
202 		backup_di->i_blkno = cpu_to_le64(blkno);
203 
204 		ret = ocfs2_write_super_or_backup(osb, backup);
205 		brelse(backup);
206 		backup = NULL;
207 		if (ret < 0) {
208 			mlog_errno(ret);
209 			break;
210 		}
211 	}
212 
213 	return ret;
214 }
215 
216 static void ocfs2_update_super_and_backups(struct inode *inode,
217 					   int new_clusters)
218 {
219 	int ret;
220 	u32 clusters = 0;
221 	struct buffer_head *super_bh = NULL;
222 	struct ocfs2_dinode *super_di = NULL;
223 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
224 
225 	/*
226 	 * update the superblock last.
227 	 * It doesn't matter if the write failed.
228 	 */
229 	ret = ocfs2_read_blocks_sync(osb, OCFS2_SUPER_BLOCK_BLKNO, 1,
230 				     &super_bh);
231 	if (ret < 0) {
232 		mlog_errno(ret);
233 		goto out;
234 	}
235 
236 	super_di = (struct ocfs2_dinode *)super_bh->b_data;
237 	le32_add_cpu(&super_di->i_clusters, new_clusters);
238 	clusters = le32_to_cpu(super_di->i_clusters);
239 
240 	ret = ocfs2_write_super_or_backup(osb, super_bh);
241 	if (ret < 0) {
242 		mlog_errno(ret);
243 		goto out;
244 	}
245 
246 	if (OCFS2_HAS_COMPAT_FEATURE(osb->sb, OCFS2_FEATURE_COMPAT_BACKUP_SB))
247 		ret = update_backups(inode, clusters, super_bh->b_data);
248 
249 out:
250 	brelse(super_bh);
251 	if (ret)
252 		printk(KERN_WARNING "ocfs2: Failed to update super blocks on %s"
253 			" during fs resize. This condition is not fatal,"
254 			" but fsck.ocfs2 should be run to fix it\n",
255 			osb->dev_str);
256 	return;
257 }
258 
259 /*
260  * Extend the filesystem to the new number of clusters specified.  This entry
261  * point is only used to extend the current filesystem to the end of the last
262  * existing group.
263  */
264 int ocfs2_group_extend(struct inode * inode, int new_clusters)
265 {
266 	int ret;
267 	handle_t *handle;
268 	struct buffer_head *main_bm_bh = NULL;
269 	struct buffer_head *group_bh = NULL;
270 	struct inode *main_bm_inode = NULL;
271 	struct ocfs2_dinode *fe = NULL;
272 	struct ocfs2_group_desc *group = NULL;
273 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
274 	u16 cl_bpc;
275 	u32 first_new_cluster;
276 	u64 lgd_blkno;
277 
278 	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
279 		return -EROFS;
280 
281 	if (new_clusters < 0)
282 		return -EINVAL;
283 	else if (new_clusters == 0)
284 		return 0;
285 
286 	main_bm_inode = ocfs2_get_system_file_inode(osb,
287 						    GLOBAL_BITMAP_SYSTEM_INODE,
288 						    OCFS2_INVALID_SLOT);
289 	if (!main_bm_inode) {
290 		ret = -EINVAL;
291 		mlog_errno(ret);
292 		goto out;
293 	}
294 
295 	mutex_lock(&main_bm_inode->i_mutex);
296 
297 	ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
298 	if (ret < 0) {
299 		mlog_errno(ret);
300 		goto out_mutex;
301 	}
302 
303 	fe = (struct ocfs2_dinode *)main_bm_bh->b_data;
304 
305 	/* main_bm_bh is validated by inode read inside ocfs2_inode_lock(),
306 	 * so any corruption is a code bug. */
307 	BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
308 
309 	if (le16_to_cpu(fe->id2.i_chain.cl_cpg) !=
310 		ocfs2_group_bitmap_size(osb->sb, 0,
311 					osb->s_feature_incompat) * 8) {
312 		mlog(ML_ERROR, "The disk is too old and small. "
313 		     "Force to do offline resize.");
314 		ret = -EINVAL;
315 		goto out_unlock;
316 	}
317 
318 	first_new_cluster = le32_to_cpu(fe->i_clusters);
319 	lgd_blkno = ocfs2_which_cluster_group(main_bm_inode,
320 					      first_new_cluster - 1);
321 
322 	ret = ocfs2_read_group_descriptor(main_bm_inode, fe, lgd_blkno,
323 					  &group_bh);
324 	if (ret < 0) {
325 		mlog_errno(ret);
326 		goto out_unlock;
327 	}
328 	group = (struct ocfs2_group_desc *)group_bh->b_data;
329 
330 	cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc);
331 	if (le16_to_cpu(group->bg_bits) / cl_bpc + new_clusters >
332 		le16_to_cpu(fe->id2.i_chain.cl_cpg)) {
333 		ret = -EINVAL;
334 		goto out_unlock;
335 	}
336 
337 
338 	trace_ocfs2_group_extend(
339 	     (unsigned long long)le64_to_cpu(group->bg_blkno), new_clusters);
340 
341 	handle = ocfs2_start_trans(osb, OCFS2_GROUP_EXTEND_CREDITS);
342 	if (IS_ERR(handle)) {
343 		mlog_errno(PTR_ERR(handle));
344 		ret = -EINVAL;
345 		goto out_unlock;
346 	}
347 
348 	/* update the last group descriptor and inode. */
349 	ret = ocfs2_update_last_group_and_inode(handle, main_bm_inode,
350 						main_bm_bh, group_bh,
351 						first_new_cluster,
352 						new_clusters);
353 	if (ret) {
354 		mlog_errno(ret);
355 		goto out_commit;
356 	}
357 
358 	ocfs2_update_super_and_backups(main_bm_inode, new_clusters);
359 
360 out_commit:
361 	ocfs2_commit_trans(osb, handle);
362 out_unlock:
363 	brelse(group_bh);
364 	brelse(main_bm_bh);
365 
366 	ocfs2_inode_unlock(main_bm_inode, 1);
367 
368 out_mutex:
369 	mutex_unlock(&main_bm_inode->i_mutex);
370 	iput(main_bm_inode);
371 
372 out:
373 	return ret;
374 }
375 
376 static int ocfs2_check_new_group(struct inode *inode,
377 				 struct ocfs2_dinode *di,
378 				 struct ocfs2_new_group_input *input,
379 				 struct buffer_head *group_bh)
380 {
381 	int ret;
382 	struct ocfs2_group_desc *gd =
383 		(struct ocfs2_group_desc *)group_bh->b_data;
384 	u16 cl_bpc = le16_to_cpu(di->id2.i_chain.cl_bpc);
385 
386 	ret = ocfs2_check_group_descriptor(inode->i_sb, di, group_bh);
387 	if (ret)
388 		goto out;
389 
390 	ret = -EINVAL;
391 	if (le16_to_cpu(gd->bg_chain) != input->chain)
392 		mlog(ML_ERROR, "Group descriptor # %llu has bad chain %u "
393 		     "while input has %u set.\n",
394 		     (unsigned long long)le64_to_cpu(gd->bg_blkno),
395 		     le16_to_cpu(gd->bg_chain), input->chain);
396 	else if (le16_to_cpu(gd->bg_bits) != input->clusters * cl_bpc)
397 		mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but "
398 		     "input has %u clusters set\n",
399 		     (unsigned long long)le64_to_cpu(gd->bg_blkno),
400 		     le16_to_cpu(gd->bg_bits), input->clusters);
401 	else if (le16_to_cpu(gd->bg_free_bits_count) != input->frees * cl_bpc)
402 		mlog(ML_ERROR, "Group descriptor # %llu has free bit count %u "
403 		     "but it should have %u set\n",
404 		     (unsigned long long)le64_to_cpu(gd->bg_blkno),
405 		     le16_to_cpu(gd->bg_bits),
406 		     input->frees * cl_bpc);
407 	else
408 		ret = 0;
409 
410 out:
411 	return ret;
412 }
413 
414 static int ocfs2_verify_group_and_input(struct inode *inode,
415 					struct ocfs2_dinode *di,
416 					struct ocfs2_new_group_input *input,
417 					struct buffer_head *group_bh)
418 {
419 	u16 cl_count = le16_to_cpu(di->id2.i_chain.cl_count);
420 	u16 cl_cpg = le16_to_cpu(di->id2.i_chain.cl_cpg);
421 	u16 next_free = le16_to_cpu(di->id2.i_chain.cl_next_free_rec);
422 	u32 cluster = ocfs2_blocks_to_clusters(inode->i_sb, input->group);
423 	u32 total_clusters = le32_to_cpu(di->i_clusters);
424 	int ret = -EINVAL;
425 
426 	if (cluster < total_clusters)
427 		mlog(ML_ERROR, "add a group which is in the current volume.\n");
428 	else if (input->chain >= cl_count)
429 		mlog(ML_ERROR, "input chain exceeds the limit.\n");
430 	else if (next_free != cl_count && next_free != input->chain)
431 		mlog(ML_ERROR,
432 		     "the add group should be in chain %u\n", next_free);
433 	else if (total_clusters + input->clusters < total_clusters)
434 		mlog(ML_ERROR, "add group's clusters overflow.\n");
435 	else if (input->clusters > cl_cpg)
436 		mlog(ML_ERROR, "the cluster exceeds the maximum of a group\n");
437 	else if (input->frees > input->clusters)
438 		mlog(ML_ERROR, "the free cluster exceeds the total clusters\n");
439 	else if (total_clusters % cl_cpg != 0)
440 		mlog(ML_ERROR,
441 		     "the last group isn't full. Use group extend first.\n");
442 	else if (input->group != ocfs2_which_cluster_group(inode, cluster))
443 		mlog(ML_ERROR, "group blkno is invalid\n");
444 	else if ((ret = ocfs2_check_new_group(inode, di, input, group_bh)))
445 		mlog(ML_ERROR, "group descriptor check failed.\n");
446 	else
447 		ret = 0;
448 
449 	return ret;
450 }
451 
452 /* Add a new group descriptor to global_bitmap. */
453 int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
454 {
455 	int ret;
456 	handle_t *handle;
457 	struct buffer_head *main_bm_bh = NULL;
458 	struct inode *main_bm_inode = NULL;
459 	struct ocfs2_dinode *fe = NULL;
460 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
461 	struct buffer_head *group_bh = NULL;
462 	struct ocfs2_group_desc *group = NULL;
463 	struct ocfs2_chain_list *cl;
464 	struct ocfs2_chain_rec *cr;
465 	u16 cl_bpc;
466 	u64 bg_ptr;
467 
468 	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
469 		return -EROFS;
470 
471 	main_bm_inode = ocfs2_get_system_file_inode(osb,
472 						    GLOBAL_BITMAP_SYSTEM_INODE,
473 						    OCFS2_INVALID_SLOT);
474 	if (!main_bm_inode) {
475 		ret = -EINVAL;
476 		mlog_errno(ret);
477 		goto out;
478 	}
479 
480 	mutex_lock(&main_bm_inode->i_mutex);
481 
482 	ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
483 	if (ret < 0) {
484 		mlog_errno(ret);
485 		goto out_mutex;
486 	}
487 
488 	fe = (struct ocfs2_dinode *)main_bm_bh->b_data;
489 
490 	if (le16_to_cpu(fe->id2.i_chain.cl_cpg) !=
491 		ocfs2_group_bitmap_size(osb->sb, 0,
492 					osb->s_feature_incompat) * 8) {
493 		mlog(ML_ERROR, "The disk is too old and small."
494 		     " Force to do offline resize.");
495 		ret = -EINVAL;
496 		goto out_unlock;
497 	}
498 
499 	ret = ocfs2_read_blocks_sync(osb, input->group, 1, &group_bh);
500 	if (ret < 0) {
501 		mlog(ML_ERROR, "Can't read the group descriptor # %llu "
502 		     "from the device.", (unsigned long long)input->group);
503 		goto out_unlock;
504 	}
505 
506 	ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), group_bh);
507 
508 	ret = ocfs2_verify_group_and_input(main_bm_inode, fe, input, group_bh);
509 	if (ret) {
510 		mlog_errno(ret);
511 		goto out_free_group_bh;
512 	}
513 
514 	trace_ocfs2_group_add((unsigned long long)input->group,
515 			       input->chain, input->clusters, input->frees);
516 
517 	handle = ocfs2_start_trans(osb, OCFS2_GROUP_ADD_CREDITS);
518 	if (IS_ERR(handle)) {
519 		mlog_errno(PTR_ERR(handle));
520 		ret = -EINVAL;
521 		goto out_free_group_bh;
522 	}
523 
524 	cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc);
525 	cl = &fe->id2.i_chain;
526 	cr = &cl->cl_recs[input->chain];
527 
528 	ret = ocfs2_journal_access_gd(handle, INODE_CACHE(main_bm_inode),
529 				      group_bh, OCFS2_JOURNAL_ACCESS_WRITE);
530 	if (ret < 0) {
531 		mlog_errno(ret);
532 		goto out_commit;
533 	}
534 
535 	group = (struct ocfs2_group_desc *)group_bh->b_data;
536 	bg_ptr = le64_to_cpu(group->bg_next_group);
537 	group->bg_next_group = cr->c_blkno;
538 	ocfs2_journal_dirty(handle, group_bh);
539 
540 	ret = ocfs2_journal_access_di(handle, INODE_CACHE(main_bm_inode),
541 				      main_bm_bh, OCFS2_JOURNAL_ACCESS_WRITE);
542 	if (ret < 0) {
543 		group->bg_next_group = cpu_to_le64(bg_ptr);
544 		mlog_errno(ret);
545 		goto out_commit;
546 	}
547 
548 	if (input->chain == le16_to_cpu(cl->cl_next_free_rec)) {
549 		le16_add_cpu(&cl->cl_next_free_rec, 1);
550 		memset(cr, 0, sizeof(struct ocfs2_chain_rec));
551 	}
552 
553 	cr->c_blkno = cpu_to_le64(input->group);
554 	le32_add_cpu(&cr->c_total, input->clusters * cl_bpc);
555 	le32_add_cpu(&cr->c_free, input->frees * cl_bpc);
556 
557 	le32_add_cpu(&fe->id1.bitmap1.i_total, input->clusters *cl_bpc);
558 	le32_add_cpu(&fe->id1.bitmap1.i_used,
559 		     (input->clusters - input->frees) * cl_bpc);
560 	le32_add_cpu(&fe->i_clusters, input->clusters);
561 
562 	ocfs2_journal_dirty(handle, main_bm_bh);
563 
564 	spin_lock(&OCFS2_I(main_bm_inode)->ip_lock);
565 	OCFS2_I(main_bm_inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
566 	le64_add_cpu(&fe->i_size, (u64)input->clusters << osb->s_clustersize_bits);
567 	spin_unlock(&OCFS2_I(main_bm_inode)->ip_lock);
568 	i_size_write(main_bm_inode, le64_to_cpu(fe->i_size));
569 
570 	ocfs2_update_super_and_backups(main_bm_inode, input->clusters);
571 
572 out_commit:
573 	ocfs2_commit_trans(osb, handle);
574 
575 out_free_group_bh:
576 	brelse(group_bh);
577 
578 out_unlock:
579 	brelse(main_bm_bh);
580 
581 	ocfs2_inode_unlock(main_bm_inode, 1);
582 
583 out_mutex:
584 	mutex_unlock(&main_bm_inode->i_mutex);
585 	iput(main_bm_inode);
586 
587 out:
588 	return ret;
589 }
590