xref: /linux/fs/ocfs2/resize.c (revision f9bff0e31881d03badf191d3b0005839391f5f2b)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * resize.c
4  *
5  * volume resize.
6  * Inspired by ext3/resize.c.
7  *
8  * Copyright (C) 2007 Oracle.  All rights reserved.
9  */
10 
11 #include <linux/fs.h>
12 #include <linux/types.h>
13 
14 #include <cluster/masklog.h>
15 
16 #include "ocfs2.h"
17 
18 #include "alloc.h"
19 #include "dlmglue.h"
20 #include "inode.h"
21 #include "journal.h"
22 #include "super.h"
23 #include "sysfile.h"
24 #include "uptodate.h"
25 #include "ocfs2_trace.h"
26 
27 #include "buffer_head_io.h"
28 #include "suballoc.h"
29 #include "resize.h"
30 
31 /*
32  * Check whether there are new backup superblocks exist
33  * in the last group. If there are some, mark them or clear
34  * them in the bitmap.
35  *
36  * Return how many backups we find in the last group.
37  */
38 static u16 ocfs2_calc_new_backup_super(struct inode *inode,
39 				       struct ocfs2_group_desc *gd,
40 				       u16 cl_cpg,
41 				       u16 old_bg_clusters,
42 				       int set)
43 {
44 	int i;
45 	u16 backups = 0;
46 	u32 cluster, lgd_cluster;
47 	u64 blkno, gd_blkno, lgd_blkno = le64_to_cpu(gd->bg_blkno);
48 
49 	for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) {
50 		blkno = ocfs2_backup_super_blkno(inode->i_sb, i);
51 		cluster = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
52 
53 		gd_blkno = ocfs2_which_cluster_group(inode, cluster);
54 		if (gd_blkno < lgd_blkno)
55 			continue;
56 		else if (gd_blkno > lgd_blkno)
57 			break;
58 
59 		/* check if already done backup super */
60 		lgd_cluster = ocfs2_blocks_to_clusters(inode->i_sb, lgd_blkno);
61 		lgd_cluster += old_bg_clusters;
62 		if (lgd_cluster >= cluster)
63 			continue;
64 
65 		if (set)
66 			ocfs2_set_bit(cluster % cl_cpg,
67 				      (unsigned long *)gd->bg_bitmap);
68 		else
69 			ocfs2_clear_bit(cluster % cl_cpg,
70 					(unsigned long *)gd->bg_bitmap);
71 		backups++;
72 	}
73 
74 	return backups;
75 }
76 
77 static int ocfs2_update_last_group_and_inode(handle_t *handle,
78 					     struct inode *bm_inode,
79 					     struct buffer_head *bm_bh,
80 					     struct buffer_head *group_bh,
81 					     u32 first_new_cluster,
82 					     int new_clusters)
83 {
84 	int ret = 0;
85 	struct ocfs2_super *osb = OCFS2_SB(bm_inode->i_sb);
86 	struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bm_bh->b_data;
87 	struct ocfs2_chain_list *cl = &fe->id2.i_chain;
88 	struct ocfs2_chain_rec *cr;
89 	struct ocfs2_group_desc *group;
90 	u16 chain, num_bits, backups = 0;
91 	u16 cl_bpc = le16_to_cpu(cl->cl_bpc);
92 	u16 cl_cpg = le16_to_cpu(cl->cl_cpg);
93 	u16 old_bg_clusters;
94 
95 	trace_ocfs2_update_last_group_and_inode(new_clusters,
96 						first_new_cluster);
97 
98 	ret = ocfs2_journal_access_gd(handle, INODE_CACHE(bm_inode),
99 				      group_bh, OCFS2_JOURNAL_ACCESS_WRITE);
100 	if (ret < 0) {
101 		mlog_errno(ret);
102 		goto out;
103 	}
104 
105 	group = (struct ocfs2_group_desc *)group_bh->b_data;
106 
107 	old_bg_clusters = le16_to_cpu(group->bg_bits) / cl_bpc;
108 	/* update the group first. */
109 	num_bits = new_clusters * cl_bpc;
110 	le16_add_cpu(&group->bg_bits, num_bits);
111 	le16_add_cpu(&group->bg_free_bits_count, num_bits);
112 
113 	/*
114 	 * check whether there are some new backup superblocks exist in
115 	 * this group and update the group bitmap accordingly.
116 	 */
117 	if (OCFS2_HAS_COMPAT_FEATURE(osb->sb,
118 				     OCFS2_FEATURE_COMPAT_BACKUP_SB)) {
119 		backups = ocfs2_calc_new_backup_super(bm_inode,
120 						     group,
121 						     cl_cpg, old_bg_clusters, 1);
122 		le16_add_cpu(&group->bg_free_bits_count, -1 * backups);
123 	}
124 
125 	ocfs2_journal_dirty(handle, group_bh);
126 
127 	/* update the inode accordingly. */
128 	ret = ocfs2_journal_access_di(handle, INODE_CACHE(bm_inode), bm_bh,
129 				      OCFS2_JOURNAL_ACCESS_WRITE);
130 	if (ret < 0) {
131 		mlog_errno(ret);
132 		goto out_rollback;
133 	}
134 
135 	chain = le16_to_cpu(group->bg_chain);
136 	cr = (&cl->cl_recs[chain]);
137 	le32_add_cpu(&cr->c_total, num_bits);
138 	le32_add_cpu(&cr->c_free, num_bits);
139 	le32_add_cpu(&fe->id1.bitmap1.i_total, num_bits);
140 	le32_add_cpu(&fe->i_clusters, new_clusters);
141 
142 	if (backups) {
143 		le32_add_cpu(&cr->c_free, -1 * backups);
144 		le32_add_cpu(&fe->id1.bitmap1.i_used, backups);
145 	}
146 
147 	spin_lock(&OCFS2_I(bm_inode)->ip_lock);
148 	OCFS2_I(bm_inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
149 	le64_add_cpu(&fe->i_size, (u64)new_clusters << osb->s_clustersize_bits);
150 	spin_unlock(&OCFS2_I(bm_inode)->ip_lock);
151 	i_size_write(bm_inode, le64_to_cpu(fe->i_size));
152 
153 	ocfs2_journal_dirty(handle, bm_bh);
154 
155 out_rollback:
156 	if (ret < 0) {
157 		ocfs2_calc_new_backup_super(bm_inode,
158 					    group,
159 					    cl_cpg, old_bg_clusters, 0);
160 		le16_add_cpu(&group->bg_free_bits_count, backups);
161 		le16_add_cpu(&group->bg_bits, -1 * num_bits);
162 		le16_add_cpu(&group->bg_free_bits_count, -1 * num_bits);
163 	}
164 out:
165 	if (ret)
166 		mlog_errno(ret);
167 	return ret;
168 }
169 
170 static int update_backups(struct inode * inode, u32 clusters, char *data)
171 {
172 	int i, ret = 0;
173 	u32 cluster;
174 	u64 blkno;
175 	struct buffer_head *backup = NULL;
176 	struct ocfs2_dinode *backup_di = NULL;
177 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
178 
179 	/* calculate the real backups we need to update. */
180 	for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) {
181 		blkno = ocfs2_backup_super_blkno(inode->i_sb, i);
182 		cluster = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
183 		if (cluster >= clusters)
184 			break;
185 
186 		ret = ocfs2_read_blocks_sync(osb, blkno, 1, &backup);
187 		if (ret < 0) {
188 			mlog_errno(ret);
189 			break;
190 		}
191 
192 		memcpy(backup->b_data, data, inode->i_sb->s_blocksize);
193 
194 		backup_di = (struct ocfs2_dinode *)backup->b_data;
195 		backup_di->i_blkno = cpu_to_le64(blkno);
196 
197 		ret = ocfs2_write_super_or_backup(osb, backup);
198 		brelse(backup);
199 		backup = NULL;
200 		if (ret < 0) {
201 			mlog_errno(ret);
202 			break;
203 		}
204 	}
205 
206 	return ret;
207 }
208 
209 static void ocfs2_update_super_and_backups(struct inode *inode,
210 					   int new_clusters)
211 {
212 	int ret;
213 	u32 clusters = 0;
214 	struct buffer_head *super_bh = NULL;
215 	struct ocfs2_dinode *super_di = NULL;
216 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
217 
218 	/*
219 	 * update the superblock last.
220 	 * It doesn't matter if the write failed.
221 	 */
222 	ret = ocfs2_read_blocks_sync(osb, OCFS2_SUPER_BLOCK_BLKNO, 1,
223 				     &super_bh);
224 	if (ret < 0) {
225 		mlog_errno(ret);
226 		goto out;
227 	}
228 
229 	super_di = (struct ocfs2_dinode *)super_bh->b_data;
230 	le32_add_cpu(&super_di->i_clusters, new_clusters);
231 	clusters = le32_to_cpu(super_di->i_clusters);
232 
233 	ret = ocfs2_write_super_or_backup(osb, super_bh);
234 	if (ret < 0) {
235 		mlog_errno(ret);
236 		goto out;
237 	}
238 
239 	if (OCFS2_HAS_COMPAT_FEATURE(osb->sb, OCFS2_FEATURE_COMPAT_BACKUP_SB))
240 		ret = update_backups(inode, clusters, super_bh->b_data);
241 
242 out:
243 	brelse(super_bh);
244 	if (ret)
245 		printk(KERN_WARNING "ocfs2: Failed to update super blocks on %s"
246 			" during fs resize. This condition is not fatal,"
247 			" but fsck.ocfs2 should be run to fix it\n",
248 			osb->dev_str);
249 	return;
250 }
251 
252 /*
253  * Extend the filesystem to the new number of clusters specified.  This entry
254  * point is only used to extend the current filesystem to the end of the last
255  * existing group.
256  */
257 int ocfs2_group_extend(struct inode * inode, int new_clusters)
258 {
259 	int ret;
260 	handle_t *handle;
261 	struct buffer_head *main_bm_bh = NULL;
262 	struct buffer_head *group_bh = NULL;
263 	struct inode *main_bm_inode = NULL;
264 	struct ocfs2_dinode *fe = NULL;
265 	struct ocfs2_group_desc *group = NULL;
266 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
267 	u16 cl_bpc;
268 	u32 first_new_cluster;
269 	u64 lgd_blkno;
270 
271 	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
272 		return -EROFS;
273 
274 	if (new_clusters < 0)
275 		return -EINVAL;
276 	else if (new_clusters == 0)
277 		return 0;
278 
279 	main_bm_inode = ocfs2_get_system_file_inode(osb,
280 						    GLOBAL_BITMAP_SYSTEM_INODE,
281 						    OCFS2_INVALID_SLOT);
282 	if (!main_bm_inode) {
283 		ret = -EINVAL;
284 		mlog_errno(ret);
285 		goto out;
286 	}
287 
288 	inode_lock(main_bm_inode);
289 
290 	ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
291 	if (ret < 0) {
292 		mlog_errno(ret);
293 		goto out_mutex;
294 	}
295 
296 	fe = (struct ocfs2_dinode *)main_bm_bh->b_data;
297 
298 	/* main_bm_bh is validated by inode read inside ocfs2_inode_lock(),
299 	 * so any corruption is a code bug. */
300 	BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
301 
302 	if (le16_to_cpu(fe->id2.i_chain.cl_cpg) !=
303 		ocfs2_group_bitmap_size(osb->sb, 0,
304 					osb->s_feature_incompat) * 8) {
305 		mlog(ML_ERROR, "The disk is too old and small. "
306 		     "Force to do offline resize.");
307 		ret = -EINVAL;
308 		goto out_unlock;
309 	}
310 
311 	first_new_cluster = le32_to_cpu(fe->i_clusters);
312 	lgd_blkno = ocfs2_which_cluster_group(main_bm_inode,
313 					      first_new_cluster - 1);
314 
315 	ret = ocfs2_read_group_descriptor(main_bm_inode, fe, lgd_blkno,
316 					  &group_bh);
317 	if (ret < 0) {
318 		mlog_errno(ret);
319 		goto out_unlock;
320 	}
321 	group = (struct ocfs2_group_desc *)group_bh->b_data;
322 
323 	cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc);
324 	if (le16_to_cpu(group->bg_bits) / cl_bpc + new_clusters >
325 		le16_to_cpu(fe->id2.i_chain.cl_cpg)) {
326 		ret = -EINVAL;
327 		goto out_unlock;
328 	}
329 
330 
331 	trace_ocfs2_group_extend(
332 	     (unsigned long long)le64_to_cpu(group->bg_blkno), new_clusters);
333 
334 	handle = ocfs2_start_trans(osb, OCFS2_GROUP_EXTEND_CREDITS);
335 	if (IS_ERR(handle)) {
336 		mlog_errno(PTR_ERR(handle));
337 		ret = -EINVAL;
338 		goto out_unlock;
339 	}
340 
341 	/* update the last group descriptor and inode. */
342 	ret = ocfs2_update_last_group_and_inode(handle, main_bm_inode,
343 						main_bm_bh, group_bh,
344 						first_new_cluster,
345 						new_clusters);
346 	if (ret) {
347 		mlog_errno(ret);
348 		goto out_commit;
349 	}
350 
351 	ocfs2_update_super_and_backups(main_bm_inode, new_clusters);
352 
353 out_commit:
354 	ocfs2_commit_trans(osb, handle);
355 out_unlock:
356 	brelse(group_bh);
357 	brelse(main_bm_bh);
358 
359 	ocfs2_inode_unlock(main_bm_inode, 1);
360 
361 out_mutex:
362 	inode_unlock(main_bm_inode);
363 	iput(main_bm_inode);
364 
365 out:
366 	return ret;
367 }
368 
369 static int ocfs2_check_new_group(struct inode *inode,
370 				 struct ocfs2_dinode *di,
371 				 struct ocfs2_new_group_input *input,
372 				 struct buffer_head *group_bh)
373 {
374 	int ret;
375 	struct ocfs2_group_desc *gd =
376 		(struct ocfs2_group_desc *)group_bh->b_data;
377 	u16 cl_bpc = le16_to_cpu(di->id2.i_chain.cl_bpc);
378 
379 	ret = ocfs2_check_group_descriptor(inode->i_sb, di, group_bh);
380 	if (ret)
381 		goto out;
382 
383 	ret = -EINVAL;
384 	if (le16_to_cpu(gd->bg_chain) != input->chain)
385 		mlog(ML_ERROR, "Group descriptor # %llu has bad chain %u "
386 		     "while input has %u set.\n",
387 		     (unsigned long long)le64_to_cpu(gd->bg_blkno),
388 		     le16_to_cpu(gd->bg_chain), input->chain);
389 	else if (le16_to_cpu(gd->bg_bits) != input->clusters * cl_bpc)
390 		mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but "
391 		     "input has %u clusters set\n",
392 		     (unsigned long long)le64_to_cpu(gd->bg_blkno),
393 		     le16_to_cpu(gd->bg_bits), input->clusters);
394 	else if (le16_to_cpu(gd->bg_free_bits_count) != input->frees * cl_bpc)
395 		mlog(ML_ERROR, "Group descriptor # %llu has free bit count %u "
396 		     "but it should have %u set\n",
397 		     (unsigned long long)le64_to_cpu(gd->bg_blkno),
398 		     le16_to_cpu(gd->bg_bits),
399 		     input->frees * cl_bpc);
400 	else
401 		ret = 0;
402 
403 out:
404 	return ret;
405 }
406 
407 static int ocfs2_verify_group_and_input(struct inode *inode,
408 					struct ocfs2_dinode *di,
409 					struct ocfs2_new_group_input *input,
410 					struct buffer_head *group_bh)
411 {
412 	u16 cl_count = le16_to_cpu(di->id2.i_chain.cl_count);
413 	u16 cl_cpg = le16_to_cpu(di->id2.i_chain.cl_cpg);
414 	u16 next_free = le16_to_cpu(di->id2.i_chain.cl_next_free_rec);
415 	u32 cluster = ocfs2_blocks_to_clusters(inode->i_sb, input->group);
416 	u32 total_clusters = le32_to_cpu(di->i_clusters);
417 	int ret = -EINVAL;
418 
419 	if (cluster < total_clusters)
420 		mlog(ML_ERROR, "add a group which is in the current volume.\n");
421 	else if (input->chain >= cl_count)
422 		mlog(ML_ERROR, "input chain exceeds the limit.\n");
423 	else if (next_free != cl_count && next_free != input->chain)
424 		mlog(ML_ERROR,
425 		     "the add group should be in chain %u\n", next_free);
426 	else if (total_clusters + input->clusters < total_clusters)
427 		mlog(ML_ERROR, "add group's clusters overflow.\n");
428 	else if (input->clusters > cl_cpg)
429 		mlog(ML_ERROR, "the cluster exceeds the maximum of a group\n");
430 	else if (input->frees > input->clusters)
431 		mlog(ML_ERROR, "the free cluster exceeds the total clusters\n");
432 	else if (total_clusters % cl_cpg != 0)
433 		mlog(ML_ERROR,
434 		     "the last group isn't full. Use group extend first.\n");
435 	else if (input->group != ocfs2_which_cluster_group(inode, cluster))
436 		mlog(ML_ERROR, "group blkno is invalid\n");
437 	else if ((ret = ocfs2_check_new_group(inode, di, input, group_bh)))
438 		mlog(ML_ERROR, "group descriptor check failed.\n");
439 	else
440 		ret = 0;
441 
442 	return ret;
443 }
444 
445 /* Add a new group descriptor to global_bitmap. */
446 int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
447 {
448 	int ret;
449 	handle_t *handle;
450 	struct buffer_head *main_bm_bh = NULL;
451 	struct inode *main_bm_inode = NULL;
452 	struct ocfs2_dinode *fe = NULL;
453 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
454 	struct buffer_head *group_bh = NULL;
455 	struct ocfs2_group_desc *group = NULL;
456 	struct ocfs2_chain_list *cl;
457 	struct ocfs2_chain_rec *cr;
458 	u16 cl_bpc;
459 	u64 bg_ptr;
460 
461 	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
462 		return -EROFS;
463 
464 	main_bm_inode = ocfs2_get_system_file_inode(osb,
465 						    GLOBAL_BITMAP_SYSTEM_INODE,
466 						    OCFS2_INVALID_SLOT);
467 	if (!main_bm_inode) {
468 		ret = -EINVAL;
469 		mlog_errno(ret);
470 		goto out;
471 	}
472 
473 	inode_lock(main_bm_inode);
474 
475 	ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
476 	if (ret < 0) {
477 		mlog_errno(ret);
478 		goto out_mutex;
479 	}
480 
481 	fe = (struct ocfs2_dinode *)main_bm_bh->b_data;
482 
483 	if (le16_to_cpu(fe->id2.i_chain.cl_cpg) !=
484 		ocfs2_group_bitmap_size(osb->sb, 0,
485 					osb->s_feature_incompat) * 8) {
486 		mlog(ML_ERROR, "The disk is too old and small."
487 		     " Force to do offline resize.");
488 		ret = -EINVAL;
489 		goto out_unlock;
490 	}
491 
492 	ret = ocfs2_read_blocks_sync(osb, input->group, 1, &group_bh);
493 	if (ret < 0) {
494 		mlog(ML_ERROR, "Can't read the group descriptor # %llu "
495 		     "from the device.", (unsigned long long)input->group);
496 		goto out_unlock;
497 	}
498 
499 	ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), group_bh);
500 
501 	ret = ocfs2_verify_group_and_input(main_bm_inode, fe, input, group_bh);
502 	if (ret) {
503 		mlog_errno(ret);
504 		goto out_free_group_bh;
505 	}
506 
507 	trace_ocfs2_group_add((unsigned long long)input->group,
508 			       input->chain, input->clusters, input->frees);
509 
510 	handle = ocfs2_start_trans(osb, OCFS2_GROUP_ADD_CREDITS);
511 	if (IS_ERR(handle)) {
512 		mlog_errno(PTR_ERR(handle));
513 		ret = -EINVAL;
514 		goto out_free_group_bh;
515 	}
516 
517 	cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc);
518 	cl = &fe->id2.i_chain;
519 	cr = &cl->cl_recs[input->chain];
520 
521 	ret = ocfs2_journal_access_gd(handle, INODE_CACHE(main_bm_inode),
522 				      group_bh, OCFS2_JOURNAL_ACCESS_WRITE);
523 	if (ret < 0) {
524 		mlog_errno(ret);
525 		goto out_commit;
526 	}
527 
528 	group = (struct ocfs2_group_desc *)group_bh->b_data;
529 	bg_ptr = le64_to_cpu(group->bg_next_group);
530 	group->bg_next_group = cr->c_blkno;
531 	ocfs2_journal_dirty(handle, group_bh);
532 
533 	ret = ocfs2_journal_access_di(handle, INODE_CACHE(main_bm_inode),
534 				      main_bm_bh, OCFS2_JOURNAL_ACCESS_WRITE);
535 	if (ret < 0) {
536 		group->bg_next_group = cpu_to_le64(bg_ptr);
537 		mlog_errno(ret);
538 		goto out_commit;
539 	}
540 
541 	if (input->chain == le16_to_cpu(cl->cl_next_free_rec)) {
542 		le16_add_cpu(&cl->cl_next_free_rec, 1);
543 		memset(cr, 0, sizeof(struct ocfs2_chain_rec));
544 	}
545 
546 	cr->c_blkno = cpu_to_le64(input->group);
547 	le32_add_cpu(&cr->c_total, input->clusters * cl_bpc);
548 	le32_add_cpu(&cr->c_free, input->frees * cl_bpc);
549 
550 	le32_add_cpu(&fe->id1.bitmap1.i_total, input->clusters *cl_bpc);
551 	le32_add_cpu(&fe->id1.bitmap1.i_used,
552 		     (input->clusters - input->frees) * cl_bpc);
553 	le32_add_cpu(&fe->i_clusters, input->clusters);
554 
555 	ocfs2_journal_dirty(handle, main_bm_bh);
556 
557 	spin_lock(&OCFS2_I(main_bm_inode)->ip_lock);
558 	OCFS2_I(main_bm_inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
559 	le64_add_cpu(&fe->i_size, (u64)input->clusters << osb->s_clustersize_bits);
560 	spin_unlock(&OCFS2_I(main_bm_inode)->ip_lock);
561 	i_size_write(main_bm_inode, le64_to_cpu(fe->i_size));
562 
563 	ocfs2_update_super_and_backups(main_bm_inode, input->clusters);
564 
565 out_commit:
566 	ocfs2_commit_trans(osb, handle);
567 
568 out_free_group_bh:
569 	brelse(group_bh);
570 
571 out_unlock:
572 	brelse(main_bm_bh);
573 
574 	ocfs2_inode_unlock(main_bm_inode, 1);
575 
576 out_mutex:
577 	inode_unlock(main_bm_inode);
578 	iput(main_bm_inode);
579 
580 out:
581 	return ret;
582 }
583