xref: /linux/fs/ocfs2/slot_map.c (revision 32a92f8c89326985e05dce8b22d3f0aa07a3e1bd)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * slot_map.c
4  *
5  * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
6  */
7 
8 #include <linux/types.h>
9 #include <linux/slab.h>
10 #include <linux/highmem.h>
11 
12 #include <cluster/masklog.h>
13 
14 #include "ocfs2.h"
15 
16 #include "dlmglue.h"
17 #include "extent_map.h"
18 #include "heartbeat.h"
19 #include "inode.h"
20 #include "slot_map.h"
21 #include "super.h"
22 #include "sysfile.h"
23 #include "ocfs2_trace.h"
24 
25 #include "buffer_head_io.h"
26 
27 
28 struct ocfs2_slot {
29 	int sl_valid;
30 	unsigned int sl_node_num;
31 };
32 
33 struct ocfs2_slot_info {
34 	int si_extended;
35 	int si_slots_per_block;
36 	struct inode *si_inode;
37 	unsigned int si_blocks;
38 	struct buffer_head **si_bh;
39 	unsigned int si_num_slots;
40 	struct ocfs2_slot si_slots[] __counted_by(si_num_slots);
41 };
42 
43 
44 static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
45 				    unsigned int node_num);
46 
47 static int ocfs2_validate_slot_map_block(struct super_block *sb,
48 					  struct buffer_head *bh);
49 
ocfs2_invalidate_slot(struct ocfs2_slot_info * si,int slot_num)50 static void ocfs2_invalidate_slot(struct ocfs2_slot_info *si,
51 				  int slot_num)
52 {
53 	BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots));
54 	si->si_slots[slot_num].sl_valid = 0;
55 }
56 
ocfs2_set_slot(struct ocfs2_slot_info * si,int slot_num,unsigned int node_num)57 static void ocfs2_set_slot(struct ocfs2_slot_info *si,
58 			   int slot_num, unsigned int node_num)
59 {
60 	BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots));
61 
62 	si->si_slots[slot_num].sl_valid = 1;
63 	si->si_slots[slot_num].sl_node_num = node_num;
64 }
65 
66 /* This version is for the extended slot map */
ocfs2_update_slot_info_extended(struct ocfs2_slot_info * si)67 static void ocfs2_update_slot_info_extended(struct ocfs2_slot_info *si)
68 {
69 	int b, i, slotno;
70 	struct ocfs2_slot_map_extended *se;
71 
72 	slotno = 0;
73 	for (b = 0; b < si->si_blocks; b++) {
74 		se = (struct ocfs2_slot_map_extended *)si->si_bh[b]->b_data;
75 		for (i = 0;
76 		     (i < si->si_slots_per_block) &&
77 		     (slotno < si->si_num_slots);
78 		     i++, slotno++) {
79 			if (se->se_slots[i].es_valid)
80 				ocfs2_set_slot(si, slotno,
81 					       le32_to_cpu(se->se_slots[i].es_node_num));
82 			else
83 				ocfs2_invalidate_slot(si, slotno);
84 		}
85 	}
86 }
87 
88 /*
89  * Post the slot information on disk into our slot_info struct.
90  * Must be protected by osb_lock.
91  */
ocfs2_update_slot_info_old(struct ocfs2_slot_info * si)92 static void ocfs2_update_slot_info_old(struct ocfs2_slot_info *si)
93 {
94 	int i;
95 	struct ocfs2_slot_map *sm;
96 
97 	sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data;
98 
99 	for (i = 0; i < si->si_num_slots; i++) {
100 		if (le16_to_cpu(sm->sm_slots[i]) == (u16)OCFS2_INVALID_SLOT)
101 			ocfs2_invalidate_slot(si, i);
102 		else
103 			ocfs2_set_slot(si, i, le16_to_cpu(sm->sm_slots[i]));
104 	}
105 }
106 
ocfs2_update_slot_info(struct ocfs2_slot_info * si)107 static void ocfs2_update_slot_info(struct ocfs2_slot_info *si)
108 {
109 	/*
110 	 * The slot data will have been refreshed when ocfs2_super_lock
111 	 * was taken.
112 	 */
113 	if (si->si_extended)
114 		ocfs2_update_slot_info_extended(si);
115 	else
116 		ocfs2_update_slot_info_old(si);
117 }
118 
ocfs2_refresh_slot_info(struct ocfs2_super * osb)119 int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
120 {
121 	int ret;
122 	struct ocfs2_slot_info *si = osb->slot_info;
123 
124 	if (si == NULL)
125 		return 0;
126 
127 	BUG_ON(si->si_blocks == 0);
128 	BUG_ON(si->si_bh == NULL);
129 
130 	trace_ocfs2_refresh_slot_info(si->si_blocks);
131 
132 	/*
133 	 * We pass -1 as blocknr because we expect all of si->si_bh to
134 	 * be !NULL.  Thus, ocfs2_read_blocks() will ignore blocknr.  If
135 	 * this is not true, the read of -1 (UINT64_MAX) will fail.
136 	 */
137 	ret = ocfs2_read_blocks(INODE_CACHE(si->si_inode), -1, si->si_blocks,
138 				si->si_bh, OCFS2_BH_IGNORE_CACHE,
139 				ocfs2_validate_slot_map_block);
140 	if (ret == 0) {
141 		spin_lock(&osb->osb_lock);
142 		ocfs2_update_slot_info(si);
143 		spin_unlock(&osb->osb_lock);
144 	}
145 
146 	return ret;
147 }
148 
149 /* post the our slot info stuff into it's destination bh and write it
150  * out. */
ocfs2_update_disk_slot_extended(struct ocfs2_slot_info * si,int slot_num,struct buffer_head ** bh)151 static void ocfs2_update_disk_slot_extended(struct ocfs2_slot_info *si,
152 					    int slot_num,
153 					    struct buffer_head **bh)
154 {
155 	int blkind = slot_num / si->si_slots_per_block;
156 	int slotno = slot_num % si->si_slots_per_block;
157 	struct ocfs2_slot_map_extended *se;
158 
159 	BUG_ON(blkind >= si->si_blocks);
160 
161 	se = (struct ocfs2_slot_map_extended *)si->si_bh[blkind]->b_data;
162 	se->se_slots[slotno].es_valid = si->si_slots[slot_num].sl_valid;
163 	if (si->si_slots[slot_num].sl_valid)
164 		se->se_slots[slotno].es_node_num =
165 			cpu_to_le32(si->si_slots[slot_num].sl_node_num);
166 	*bh = si->si_bh[blkind];
167 }
168 
ocfs2_update_disk_slot_old(struct ocfs2_slot_info * si,int slot_num,struct buffer_head ** bh)169 static void ocfs2_update_disk_slot_old(struct ocfs2_slot_info *si,
170 				       int slot_num,
171 				       struct buffer_head **bh)
172 {
173 	int i;
174 	struct ocfs2_slot_map *sm;
175 
176 	sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data;
177 	for (i = 0; i < si->si_num_slots; i++) {
178 		if (si->si_slots[i].sl_valid)
179 			sm->sm_slots[i] =
180 				cpu_to_le16(si->si_slots[i].sl_node_num);
181 		else
182 			sm->sm_slots[i] = cpu_to_le16(OCFS2_INVALID_SLOT);
183 	}
184 	*bh = si->si_bh[0];
185 }
186 
ocfs2_update_disk_slot(struct ocfs2_super * osb,struct ocfs2_slot_info * si,int slot_num)187 static int ocfs2_update_disk_slot(struct ocfs2_super *osb,
188 				  struct ocfs2_slot_info *si,
189 				  int slot_num)
190 {
191 	int status;
192 	struct buffer_head *bh;
193 
194 	spin_lock(&osb->osb_lock);
195 	if (si->si_extended)
196 		ocfs2_update_disk_slot_extended(si, slot_num, &bh);
197 	else
198 		ocfs2_update_disk_slot_old(si, slot_num, &bh);
199 	spin_unlock(&osb->osb_lock);
200 
201 	status = ocfs2_write_block(osb, bh, INODE_CACHE(si->si_inode));
202 	if (status < 0)
203 		mlog_errno(status);
204 
205 	return status;
206 }
207 
208 /*
209  * Calculate how many bytes are needed by the slot map.  Returns
210  * an error if the slot map file is too small.
211  */
ocfs2_slot_map_physical_size(struct ocfs2_super * osb,struct inode * inode,unsigned long long * bytes)212 static int ocfs2_slot_map_physical_size(struct ocfs2_super *osb,
213 					struct inode *inode,
214 					unsigned long long *bytes)
215 {
216 	unsigned long long bytes_needed;
217 
218 	if (ocfs2_uses_extended_slot_map(osb)) {
219 		bytes_needed = osb->max_slots *
220 			sizeof(struct ocfs2_extended_slot);
221 	} else {
222 		bytes_needed = osb->max_slots * sizeof(__le16);
223 	}
224 	if (bytes_needed > i_size_read(inode)) {
225 		mlog(ML_ERROR,
226 		     "Slot map file is too small!  (size %llu, needed %llu)\n",
227 		     i_size_read(inode), bytes_needed);
228 		return -ENOSPC;
229 	}
230 
231 	*bytes = bytes_needed;
232 	return 0;
233 }
234 
235 /* try to find global node in the slot info. Returns -ENOENT
236  * if nothing is found. */
__ocfs2_node_num_to_slot(struct ocfs2_slot_info * si,unsigned int node_num)237 static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
238 				    unsigned int node_num)
239 {
240 	int i, ret = -ENOENT;
241 
242 	for(i = 0; i < si->si_num_slots; i++) {
243 		if (si->si_slots[i].sl_valid &&
244 		    (node_num == si->si_slots[i].sl_node_num)) {
245 			ret = i;
246 			break;
247 		}
248 	}
249 
250 	return ret;
251 }
252 
__ocfs2_find_empty_slot(struct ocfs2_slot_info * si,int preferred)253 static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si,
254 				   int preferred)
255 {
256 	int i, ret = -ENOSPC;
257 
258 	if ((preferred >= 0) && (preferred < si->si_num_slots)) {
259 		if (!si->si_slots[preferred].sl_valid) {
260 			ret = preferred;
261 			goto out;
262 		}
263 	}
264 
265 	for(i = 0; i < si->si_num_slots; i++) {
266 		if (!si->si_slots[i].sl_valid) {
267 			ret = i;
268 			break;
269 		}
270 	}
271 out:
272 	return ret;
273 }
274 
ocfs2_node_num_to_slot(struct ocfs2_super * osb,unsigned int node_num)275 int ocfs2_node_num_to_slot(struct ocfs2_super *osb, unsigned int node_num)
276 {
277 	int slot;
278 	struct ocfs2_slot_info *si = osb->slot_info;
279 
280 	spin_lock(&osb->osb_lock);
281 	slot = __ocfs2_node_num_to_slot(si, node_num);
282 	spin_unlock(&osb->osb_lock);
283 
284 	return slot;
285 }
286 
ocfs2_slot_to_node_num_locked(struct ocfs2_super * osb,int slot_num,unsigned int * node_num)287 int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num,
288 				  unsigned int *node_num)
289 {
290 	struct ocfs2_slot_info *si = osb->slot_info;
291 
292 	assert_spin_locked(&osb->osb_lock);
293 
294 	BUG_ON(slot_num < 0);
295 	BUG_ON(slot_num >= osb->max_slots);
296 
297 	if (!si->si_slots[slot_num].sl_valid)
298 		return -ENOENT;
299 
300 	*node_num = si->si_slots[slot_num].sl_node_num;
301 	return 0;
302 }
303 
__ocfs2_free_slot_info(struct ocfs2_slot_info * si)304 static void __ocfs2_free_slot_info(struct ocfs2_slot_info *si)
305 {
306 	unsigned int i;
307 
308 	if (si == NULL)
309 		return;
310 
311 	iput(si->si_inode);
312 	if (si->si_bh) {
313 		for (i = 0; i < si->si_blocks; i++) {
314 			if (si->si_bh[i]) {
315 				brelse(si->si_bh[i]);
316 				si->si_bh[i] = NULL;
317 			}
318 		}
319 		kfree(si->si_bh);
320 	}
321 
322 	kfree(si);
323 }
324 
ocfs2_clear_slot(struct ocfs2_super * osb,int slot_num)325 int ocfs2_clear_slot(struct ocfs2_super *osb, int slot_num)
326 {
327 	struct ocfs2_slot_info *si = osb->slot_info;
328 
329 	if (si == NULL)
330 		return 0;
331 
332 	spin_lock(&osb->osb_lock);
333 	ocfs2_invalidate_slot(si, slot_num);
334 	spin_unlock(&osb->osb_lock);
335 
336 	return ocfs2_update_disk_slot(osb, osb->slot_info, slot_num);
337 }
338 
ocfs2_validate_slot_map_block(struct super_block * sb,struct buffer_head * bh)339 static int ocfs2_validate_slot_map_block(struct super_block *sb,
340 					  struct buffer_head *bh)
341 {
342 	int rc;
343 
344 	BUG_ON(!buffer_uptodate(bh));
345 
346 	if (bh->b_blocknr < OCFS2_SUPER_BLOCK_BLKNO) {
347 		rc = ocfs2_error(sb,
348 				 "Invalid Slot Map Buffer Head "
349 				 "Block Number : %llu, Should be >= %d",
350 				 (unsigned long long)bh->b_blocknr,
351 				 OCFS2_SUPER_BLOCK_BLKNO);
352 		return rc;
353 	}
354 	return 0;
355 }
356 
ocfs2_map_slot_buffers(struct ocfs2_super * osb,struct ocfs2_slot_info * si)357 static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
358 				  struct ocfs2_slot_info *si)
359 {
360 	int status = 0;
361 	u64 blkno;
362 	unsigned long long blocks, bytes = 0;
363 	unsigned int i;
364 	struct buffer_head *bh;
365 
366 	status = ocfs2_slot_map_physical_size(osb, si->si_inode, &bytes);
367 	if (status)
368 		goto bail;
369 
370 	blocks = ocfs2_blocks_for_bytes(si->si_inode->i_sb, bytes);
371 	BUG_ON(blocks > UINT_MAX);
372 	si->si_blocks = blocks;
373 	if (!si->si_blocks)
374 		goto bail;
375 
376 	if (si->si_extended)
377 		si->si_slots_per_block =
378 			(osb->sb->s_blocksize /
379 			 sizeof(struct ocfs2_extended_slot));
380 	else
381 		si->si_slots_per_block = osb->sb->s_blocksize / sizeof(__le16);
382 
383 	/* The size checks above should ensure this */
384 	BUG_ON((osb->max_slots / si->si_slots_per_block) > blocks);
385 
386 	trace_ocfs2_map_slot_buffers(bytes, si->si_blocks);
387 
388 	si->si_bh = kzalloc_objs(struct buffer_head *, si->si_blocks);
389 	if (!si->si_bh) {
390 		status = -ENOMEM;
391 		mlog_errno(status);
392 		goto bail;
393 	}
394 
395 	for (i = 0; i < si->si_blocks; i++) {
396 		status = ocfs2_extent_map_get_blocks(si->si_inode, i,
397 						     &blkno, NULL, NULL);
398 		if (status < 0) {
399 			mlog_errno(status);
400 			goto bail;
401 		}
402 
403 		trace_ocfs2_map_slot_buffers_block((unsigned long long)blkno, i);
404 
405 		bh = NULL;  /* Acquire a fresh bh */
406 		status = ocfs2_read_blocks(INODE_CACHE(si->si_inode), blkno,
407 					   1, &bh, OCFS2_BH_IGNORE_CACHE,
408 					   ocfs2_validate_slot_map_block);
409 		if (status < 0) {
410 			mlog_errno(status);
411 			goto bail;
412 		}
413 
414 		si->si_bh[i] = bh;
415 	}
416 
417 bail:
418 	return status;
419 }
420 
ocfs2_init_slot_info(struct ocfs2_super * osb)421 int ocfs2_init_slot_info(struct ocfs2_super *osb)
422 {
423 	int status;
424 	struct inode *inode = NULL;
425 	struct ocfs2_slot_info *si;
426 
427 	si = kzalloc_flex(*si, si_slots, osb->max_slots);
428 	if (!si) {
429 		status = -ENOMEM;
430 		mlog_errno(status);
431 		return status;
432 	}
433 
434 	si->si_extended = ocfs2_uses_extended_slot_map(osb);
435 	si->si_num_slots = osb->max_slots;
436 
437 	inode = ocfs2_get_system_file_inode(osb, SLOT_MAP_SYSTEM_INODE,
438 					    OCFS2_INVALID_SLOT);
439 	if (!inode) {
440 		status = -EINVAL;
441 		mlog_errno(status);
442 		goto bail;
443 	}
444 
445 	si->si_inode = inode;
446 	status = ocfs2_map_slot_buffers(osb, si);
447 	if (status < 0) {
448 		mlog_errno(status);
449 		goto bail;
450 	}
451 
452 	osb->slot_info = (struct ocfs2_slot_info *)si;
453 bail:
454 	if (status < 0)
455 		__ocfs2_free_slot_info(si);
456 
457 	return status;
458 }
459 
ocfs2_free_slot_info(struct ocfs2_super * osb)460 void ocfs2_free_slot_info(struct ocfs2_super *osb)
461 {
462 	struct ocfs2_slot_info *si = osb->slot_info;
463 
464 	osb->slot_info = NULL;
465 	__ocfs2_free_slot_info(si);
466 }
467 
ocfs2_find_slot(struct ocfs2_super * osb)468 int ocfs2_find_slot(struct ocfs2_super *osb)
469 {
470 	int status;
471 	int slot;
472 	struct ocfs2_slot_info *si;
473 
474 	si = osb->slot_info;
475 
476 	spin_lock(&osb->osb_lock);
477 	ocfs2_update_slot_info(si);
478 
479 	/* search for ourselves first and take the slot if it already
480 	 * exists. Perhaps we need to mark this in a variable for our
481 	 * own journal recovery? Possibly not, though we certainly
482 	 * need to warn to the user */
483 	slot = __ocfs2_node_num_to_slot(si, osb->node_num);
484 	if (slot < 0) {
485 		/* if no slot yet, then just take 1st available
486 		 * one. */
487 		slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
488 		if (slot < 0) {
489 			spin_unlock(&osb->osb_lock);
490 			mlog(ML_ERROR, "no free slots available!\n");
491 			status = -EINVAL;
492 			goto bail;
493 		}
494 	} else
495 		printk(KERN_INFO "ocfs2: Slot %d on device (%s) was already "
496 		       "allocated to this node!\n", slot, osb->dev_str);
497 
498 	ocfs2_set_slot(si, slot, osb->node_num);
499 	osb->slot_num = slot;
500 	spin_unlock(&osb->osb_lock);
501 
502 	trace_ocfs2_find_slot(osb->slot_num);
503 
504 	status = ocfs2_update_disk_slot(osb, si, osb->slot_num);
505 	if (status < 0) {
506 		mlog_errno(status);
507 		/*
508 		 * if write block failed, invalidate slot to avoid overwrite
509 		 * slot during dismount in case another node rightly has mounted
510 		 */
511 		spin_lock(&osb->osb_lock);
512 		ocfs2_invalidate_slot(si, osb->slot_num);
513 		osb->slot_num = OCFS2_INVALID_SLOT;
514 		spin_unlock(&osb->osb_lock);
515 	}
516 
517 bail:
518 	return status;
519 }
520 
ocfs2_put_slot(struct ocfs2_super * osb)521 void ocfs2_put_slot(struct ocfs2_super *osb)
522 {
523 	int status, slot_num;
524 	struct ocfs2_slot_info *si = osb->slot_info;
525 
526 	if (!si)
527 		return;
528 
529 	spin_lock(&osb->osb_lock);
530 	ocfs2_update_slot_info(si);
531 
532 	slot_num = osb->slot_num;
533 	ocfs2_invalidate_slot(si, osb->slot_num);
534 	osb->slot_num = OCFS2_INVALID_SLOT;
535 	spin_unlock(&osb->osb_lock);
536 
537 	status = ocfs2_update_disk_slot(osb, si, slot_num);
538 	if (status < 0)
539 		mlog_errno(status);
540 
541 	ocfs2_free_slot_info(osb);
542 }
543