xref: /linux/fs/ocfs2/slot_map.c (revision 08e8f1ef3df270daef4ffc9c4bb15669f72d5d2f)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * slot_map.c
4  *
5  * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
6  */
7 
8 #include <linux/types.h>
9 #include <linux/slab.h>
10 #include <linux/highmem.h>
11 
12 #include <cluster/masklog.h>
13 
14 #include "ocfs2.h"
15 
16 #include "dlmglue.h"
17 #include "extent_map.h"
18 #include "heartbeat.h"
19 #include "inode.h"
20 #include "slot_map.h"
21 #include "super.h"
22 #include "sysfile.h"
23 #include "ocfs2_trace.h"
24 
25 #include "buffer_head_io.h"
26 
27 
28 struct ocfs2_slot {
29 	int sl_valid;
30 	unsigned int sl_node_num;
31 };
32 
33 struct ocfs2_slot_info {
34 	int si_extended;
35 	int si_slots_per_block;
36 	struct inode *si_inode;
37 	unsigned int si_blocks;
38 	struct buffer_head **si_bh;
39 	unsigned int si_num_slots;
40 	struct ocfs2_slot si_slots[] __counted_by(si_num_slots);
41 };
42 
43 
44 static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
45 				    unsigned int node_num);
46 
47 static int ocfs2_validate_slot_map_block(struct super_block *sb,
48 					  struct buffer_head *bh);
49 
50 static void ocfs2_invalidate_slot(struct ocfs2_slot_info *si,
51 				  int slot_num)
52 {
53 	BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots));
54 	si->si_slots[slot_num].sl_valid = 0;
55 }
56 
57 static void ocfs2_set_slot(struct ocfs2_slot_info *si,
58 			   int slot_num, unsigned int node_num)
59 {
60 	BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots));
61 
62 	si->si_slots[slot_num].sl_valid = 1;
63 	si->si_slots[slot_num].sl_node_num = node_num;
64 }
65 
66 /* This version is for the extended slot map */
67 static void ocfs2_update_slot_info_extended(struct ocfs2_slot_info *si)
68 {
69 	int b, i, slotno;
70 	struct ocfs2_slot_map_extended *se;
71 
72 	slotno = 0;
73 	for (b = 0; b < si->si_blocks; b++) {
74 		se = (struct ocfs2_slot_map_extended *)si->si_bh[b]->b_data;
75 		for (i = 0;
76 		     (i < si->si_slots_per_block) &&
77 		     (slotno < si->si_num_slots);
78 		     i++, slotno++) {
79 			if (se->se_slots[i].es_valid)
80 				ocfs2_set_slot(si, slotno,
81 					       le32_to_cpu(se->se_slots[i].es_node_num));
82 			else
83 				ocfs2_invalidate_slot(si, slotno);
84 		}
85 	}
86 }
87 
88 /*
89  * Post the slot information on disk into our slot_info struct.
90  * Must be protected by osb_lock.
91  */
92 static void ocfs2_update_slot_info_old(struct ocfs2_slot_info *si)
93 {
94 	int i;
95 	struct ocfs2_slot_map *sm;
96 
97 	sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data;
98 
99 	for (i = 0; i < si->si_num_slots; i++) {
100 		if (le16_to_cpu(sm->sm_slots[i]) == (u16)OCFS2_INVALID_SLOT)
101 			ocfs2_invalidate_slot(si, i);
102 		else
103 			ocfs2_set_slot(si, i, le16_to_cpu(sm->sm_slots[i]));
104 	}
105 }
106 
107 static void ocfs2_update_slot_info(struct ocfs2_slot_info *si)
108 {
109 	/*
110 	 * The slot data will have been refreshed when ocfs2_super_lock
111 	 * was taken.
112 	 */
113 	if (si->si_extended)
114 		ocfs2_update_slot_info_extended(si);
115 	else
116 		ocfs2_update_slot_info_old(si);
117 }
118 
119 int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
120 {
121 	int ret;
122 	struct ocfs2_slot_info *si = osb->slot_info;
123 
124 	if (si == NULL)
125 		return 0;
126 
127 	BUG_ON(si->si_blocks == 0);
128 	BUG_ON(si->si_bh == NULL);
129 
130 	trace_ocfs2_refresh_slot_info(si->si_blocks);
131 
132 	/*
133 	 * We pass -1 as blocknr because we expect all of si->si_bh to
134 	 * be !NULL.  Thus, ocfs2_read_blocks() will ignore blocknr.  If
135 	 * this is not true, the read of -1 (UINT64_MAX) will fail.
136 	 */
137 	ret = ocfs2_read_blocks(INODE_CACHE(si->si_inode), -1, si->si_blocks,
138 				si->si_bh, OCFS2_BH_IGNORE_CACHE,
139 				ocfs2_validate_slot_map_block);
140 	if (ret == 0) {
141 		spin_lock(&osb->osb_lock);
142 		ocfs2_update_slot_info(si);
143 		spin_unlock(&osb->osb_lock);
144 	}
145 
146 	return ret;
147 }
148 
149 /* post the our slot info stuff into it's destination bh and write it
150  * out. */
151 static void ocfs2_update_disk_slot_extended(struct ocfs2_slot_info *si,
152 					    int slot_num,
153 					    struct buffer_head **bh)
154 {
155 	int blkind = slot_num / si->si_slots_per_block;
156 	int slotno = slot_num % si->si_slots_per_block;
157 	struct ocfs2_slot_map_extended *se;
158 
159 	BUG_ON(blkind >= si->si_blocks);
160 
161 	se = (struct ocfs2_slot_map_extended *)si->si_bh[blkind]->b_data;
162 	se->se_slots[slotno].es_valid = si->si_slots[slot_num].sl_valid;
163 	if (si->si_slots[slot_num].sl_valid)
164 		se->se_slots[slotno].es_node_num =
165 			cpu_to_le32(si->si_slots[slot_num].sl_node_num);
166 	*bh = si->si_bh[blkind];
167 }
168 
169 static void ocfs2_update_disk_slot_old(struct ocfs2_slot_info *si,
170 				       int slot_num,
171 				       struct buffer_head **bh)
172 {
173 	int i;
174 	struct ocfs2_slot_map *sm;
175 
176 	sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data;
177 	for (i = 0; i < si->si_num_slots; i++) {
178 		if (si->si_slots[i].sl_valid)
179 			sm->sm_slots[i] =
180 				cpu_to_le16(si->si_slots[i].sl_node_num);
181 		else
182 			sm->sm_slots[i] = cpu_to_le16(OCFS2_INVALID_SLOT);
183 	}
184 	*bh = si->si_bh[0];
185 }
186 
187 static int ocfs2_update_disk_slot(struct ocfs2_super *osb,
188 				  struct ocfs2_slot_info *si,
189 				  int slot_num)
190 {
191 	int status;
192 	struct buffer_head *bh;
193 
194 	spin_lock(&osb->osb_lock);
195 	if (si->si_extended)
196 		ocfs2_update_disk_slot_extended(si, slot_num, &bh);
197 	else
198 		ocfs2_update_disk_slot_old(si, slot_num, &bh);
199 	spin_unlock(&osb->osb_lock);
200 
201 	status = ocfs2_write_block(osb, bh, INODE_CACHE(si->si_inode));
202 	if (status < 0)
203 		mlog_errno(status);
204 
205 	return status;
206 }
207 
208 /*
209  * Calculate how many bytes are needed by the slot map.  Returns
210  * an error if the slot map file is too small.
211  */
212 static int ocfs2_slot_map_physical_size(struct ocfs2_super *osb,
213 					struct inode *inode,
214 					unsigned long long *bytes)
215 {
216 	unsigned long long bytes_needed;
217 
218 	if (ocfs2_uses_extended_slot_map(osb)) {
219 		bytes_needed = osb->max_slots *
220 			sizeof(struct ocfs2_extended_slot);
221 	} else {
222 		bytes_needed = osb->max_slots * sizeof(__le16);
223 	}
224 	if (bytes_needed > i_size_read(inode)) {
225 		mlog(ML_ERROR,
226 		     "Slot map file is too small!  (size %llu, needed %llu)\n",
227 		     i_size_read(inode), bytes_needed);
228 		return -ENOSPC;
229 	}
230 
231 	*bytes = bytes_needed;
232 	return 0;
233 }
234 
235 /* try to find global node in the slot info. Returns -ENOENT
236  * if nothing is found. */
237 static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
238 				    unsigned int node_num)
239 {
240 	int i, ret = -ENOENT;
241 
242 	for(i = 0; i < si->si_num_slots; i++) {
243 		if (si->si_slots[i].sl_valid &&
244 		    (node_num == si->si_slots[i].sl_node_num)) {
245 			ret = i;
246 			break;
247 		}
248 	}
249 
250 	return ret;
251 }
252 
253 static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si,
254 				   int preferred)
255 {
256 	int i, ret = -ENOSPC;
257 
258 	if ((preferred >= 0) && (preferred < si->si_num_slots)) {
259 		if (!si->si_slots[preferred].sl_valid) {
260 			ret = preferred;
261 			goto out;
262 		}
263 	}
264 
265 	for(i = 0; i < si->si_num_slots; i++) {
266 		if (!si->si_slots[i].sl_valid) {
267 			ret = i;
268 			break;
269 		}
270 	}
271 out:
272 	return ret;
273 }
274 
275 int ocfs2_node_num_to_slot(struct ocfs2_super *osb, unsigned int node_num)
276 {
277 	int slot;
278 	struct ocfs2_slot_info *si = osb->slot_info;
279 
280 	spin_lock(&osb->osb_lock);
281 	slot = __ocfs2_node_num_to_slot(si, node_num);
282 	spin_unlock(&osb->osb_lock);
283 
284 	return slot;
285 }
286 
287 int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num,
288 				  unsigned int *node_num)
289 {
290 	struct ocfs2_slot_info *si = osb->slot_info;
291 
292 	assert_spin_locked(&osb->osb_lock);
293 
294 	BUG_ON(slot_num < 0);
295 	BUG_ON(slot_num >= osb->max_slots);
296 
297 	if (!si->si_slots[slot_num].sl_valid)
298 		return -ENOENT;
299 
300 	*node_num = si->si_slots[slot_num].sl_node_num;
301 	return 0;
302 }
303 
304 static void __ocfs2_free_slot_info(struct ocfs2_slot_info *si)
305 {
306 	unsigned int i;
307 
308 	if (si == NULL)
309 		return;
310 
311 	iput(si->si_inode);
312 	if (si->si_bh) {
313 		for (i = 0; i < si->si_blocks; i++) {
314 			if (si->si_bh[i]) {
315 				brelse(si->si_bh[i]);
316 				si->si_bh[i] = NULL;
317 			}
318 		}
319 		kfree(si->si_bh);
320 	}
321 
322 	kfree(si);
323 }
324 
325 int ocfs2_clear_slot(struct ocfs2_super *osb, int slot_num)
326 {
327 	struct ocfs2_slot_info *si = osb->slot_info;
328 
329 	if (si == NULL)
330 		return 0;
331 
332 	spin_lock(&osb->osb_lock);
333 	ocfs2_invalidate_slot(si, slot_num);
334 	spin_unlock(&osb->osb_lock);
335 
336 	return ocfs2_update_disk_slot(osb, osb->slot_info, slot_num);
337 }
338 
339 static int ocfs2_validate_slot_map_block(struct super_block *sb,
340 					  struct buffer_head *bh)
341 {
342 	int rc;
343 
344 	BUG_ON(!buffer_uptodate(bh));
345 
346 	if (bh->b_blocknr < OCFS2_SUPER_BLOCK_BLKNO) {
347 		rc = ocfs2_error(sb,
348 				 "Invalid Slot Map Buffer Head "
349 				 "Block Number : %llu, Should be >= %d",
350 				 (unsigned long long)bh->b_blocknr,
351 				 OCFS2_SUPER_BLOCK_BLKNO);
352 		return rc;
353 	}
354 	return 0;
355 }
356 
357 static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
358 				  struct ocfs2_slot_info *si)
359 {
360 	int status = 0;
361 	u64 blkno;
362 	unsigned long long blocks, bytes = 0;
363 	unsigned int i;
364 	struct buffer_head *bh;
365 
366 	status = ocfs2_slot_map_physical_size(osb, si->si_inode, &bytes);
367 	if (status)
368 		goto bail;
369 
370 	blocks = ocfs2_blocks_for_bytes(si->si_inode->i_sb, bytes);
371 	BUG_ON(blocks > UINT_MAX);
372 	si->si_blocks = blocks;
373 	if (!si->si_blocks)
374 		goto bail;
375 
376 	if (si->si_extended)
377 		si->si_slots_per_block =
378 			(osb->sb->s_blocksize /
379 			 sizeof(struct ocfs2_extended_slot));
380 	else
381 		si->si_slots_per_block = osb->sb->s_blocksize / sizeof(__le16);
382 
383 	/* The size checks above should ensure this */
384 	BUG_ON((osb->max_slots / si->si_slots_per_block) > blocks);
385 
386 	trace_ocfs2_map_slot_buffers(bytes, si->si_blocks);
387 
388 	si->si_bh = kcalloc(si->si_blocks, sizeof(struct buffer_head *),
389 			    GFP_KERNEL);
390 	if (!si->si_bh) {
391 		status = -ENOMEM;
392 		mlog_errno(status);
393 		goto bail;
394 	}
395 
396 	for (i = 0; i < si->si_blocks; i++) {
397 		status = ocfs2_extent_map_get_blocks(si->si_inode, i,
398 						     &blkno, NULL, NULL);
399 		if (status < 0) {
400 			mlog_errno(status);
401 			goto bail;
402 		}
403 
404 		trace_ocfs2_map_slot_buffers_block((unsigned long long)blkno, i);
405 
406 		bh = NULL;  /* Acquire a fresh bh */
407 		status = ocfs2_read_blocks(INODE_CACHE(si->si_inode), blkno,
408 					   1, &bh, OCFS2_BH_IGNORE_CACHE,
409 					   ocfs2_validate_slot_map_block);
410 		if (status < 0) {
411 			mlog_errno(status);
412 			goto bail;
413 		}
414 
415 		si->si_bh[i] = bh;
416 	}
417 
418 bail:
419 	return status;
420 }
421 
422 int ocfs2_init_slot_info(struct ocfs2_super *osb)
423 {
424 	int status;
425 	struct inode *inode = NULL;
426 	struct ocfs2_slot_info *si;
427 
428 	si = kzalloc(struct_size(si, si_slots, osb->max_slots), GFP_KERNEL);
429 	if (!si) {
430 		status = -ENOMEM;
431 		mlog_errno(status);
432 		return status;
433 	}
434 
435 	si->si_extended = ocfs2_uses_extended_slot_map(osb);
436 	si->si_num_slots = osb->max_slots;
437 
438 	inode = ocfs2_get_system_file_inode(osb, SLOT_MAP_SYSTEM_INODE,
439 					    OCFS2_INVALID_SLOT);
440 	if (!inode) {
441 		status = -EINVAL;
442 		mlog_errno(status);
443 		goto bail;
444 	}
445 
446 	si->si_inode = inode;
447 	status = ocfs2_map_slot_buffers(osb, si);
448 	if (status < 0) {
449 		mlog_errno(status);
450 		goto bail;
451 	}
452 
453 	osb->slot_info = (struct ocfs2_slot_info *)si;
454 bail:
455 	if (status < 0)
456 		__ocfs2_free_slot_info(si);
457 
458 	return status;
459 }
460 
461 void ocfs2_free_slot_info(struct ocfs2_super *osb)
462 {
463 	struct ocfs2_slot_info *si = osb->slot_info;
464 
465 	osb->slot_info = NULL;
466 	__ocfs2_free_slot_info(si);
467 }
468 
469 int ocfs2_find_slot(struct ocfs2_super *osb)
470 {
471 	int status;
472 	int slot;
473 	struct ocfs2_slot_info *si;
474 
475 	si = osb->slot_info;
476 
477 	spin_lock(&osb->osb_lock);
478 	ocfs2_update_slot_info(si);
479 
480 	/* search for ourselves first and take the slot if it already
481 	 * exists. Perhaps we need to mark this in a variable for our
482 	 * own journal recovery? Possibly not, though we certainly
483 	 * need to warn to the user */
484 	slot = __ocfs2_node_num_to_slot(si, osb->node_num);
485 	if (slot < 0) {
486 		/* if no slot yet, then just take 1st available
487 		 * one. */
488 		slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
489 		if (slot < 0) {
490 			spin_unlock(&osb->osb_lock);
491 			mlog(ML_ERROR, "no free slots available!\n");
492 			status = -EINVAL;
493 			goto bail;
494 		}
495 	} else
496 		printk(KERN_INFO "ocfs2: Slot %d on device (%s) was already "
497 		       "allocated to this node!\n", slot, osb->dev_str);
498 
499 	ocfs2_set_slot(si, slot, osb->node_num);
500 	osb->slot_num = slot;
501 	spin_unlock(&osb->osb_lock);
502 
503 	trace_ocfs2_find_slot(osb->slot_num);
504 
505 	status = ocfs2_update_disk_slot(osb, si, osb->slot_num);
506 	if (status < 0) {
507 		mlog_errno(status);
508 		/*
509 		 * if write block failed, invalidate slot to avoid overwrite
510 		 * slot during dismount in case another node rightly has mounted
511 		 */
512 		spin_lock(&osb->osb_lock);
513 		ocfs2_invalidate_slot(si, osb->slot_num);
514 		osb->slot_num = OCFS2_INVALID_SLOT;
515 		spin_unlock(&osb->osb_lock);
516 	}
517 
518 bail:
519 	return status;
520 }
521 
522 void ocfs2_put_slot(struct ocfs2_super *osb)
523 {
524 	int status, slot_num;
525 	struct ocfs2_slot_info *si = osb->slot_info;
526 
527 	if (!si)
528 		return;
529 
530 	spin_lock(&osb->osb_lock);
531 	ocfs2_update_slot_info(si);
532 
533 	slot_num = osb->slot_num;
534 	ocfs2_invalidate_slot(si, osb->slot_num);
535 	osb->slot_num = OCFS2_INVALID_SLOT;
536 	spin_unlock(&osb->osb_lock);
537 
538 	status = ocfs2_update_disk_slot(osb, si, slot_num);
539 	if (status < 0)
540 		mlog_errno(status);
541 
542 	ocfs2_free_slot_info(osb);
543 }
544