1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * slot_map.c
4 *
5 * Copyright (C) 2002, 2004 Oracle. All rights reserved.
6 */
7
8 #include <linux/types.h>
9 #include <linux/slab.h>
10 #include <linux/highmem.h>
11
12 #include <cluster/masklog.h>
13
14 #include "ocfs2.h"
15
16 #include "dlmglue.h"
17 #include "extent_map.h"
18 #include "heartbeat.h"
19 #include "inode.h"
20 #include "slot_map.h"
21 #include "super.h"
22 #include "sysfile.h"
23 #include "ocfs2_trace.h"
24
25 #include "buffer_head_io.h"
26
27
28 struct ocfs2_slot {
29 int sl_valid;
30 unsigned int sl_node_num;
31 };
32
33 struct ocfs2_slot_info {
34 int si_extended;
35 int si_slots_per_block;
36 struct inode *si_inode;
37 unsigned int si_blocks;
38 struct buffer_head **si_bh;
39 unsigned int si_num_slots;
40 struct ocfs2_slot si_slots[] __counted_by(si_num_slots);
41 };
42
43
44 static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
45 unsigned int node_num);
46
47 static int ocfs2_validate_slot_map_block(struct super_block *sb,
48 struct buffer_head *bh);
49
ocfs2_invalidate_slot(struct ocfs2_slot_info * si,int slot_num)50 static void ocfs2_invalidate_slot(struct ocfs2_slot_info *si,
51 int slot_num)
52 {
53 BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots));
54 si->si_slots[slot_num].sl_valid = 0;
55 }
56
ocfs2_set_slot(struct ocfs2_slot_info * si,int slot_num,unsigned int node_num)57 static void ocfs2_set_slot(struct ocfs2_slot_info *si,
58 int slot_num, unsigned int node_num)
59 {
60 BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots));
61
62 si->si_slots[slot_num].sl_valid = 1;
63 si->si_slots[slot_num].sl_node_num = node_num;
64 }
65
66 /* This version is for the extended slot map */
ocfs2_update_slot_info_extended(struct ocfs2_slot_info * si)67 static void ocfs2_update_slot_info_extended(struct ocfs2_slot_info *si)
68 {
69 int b, i, slotno;
70 struct ocfs2_slot_map_extended *se;
71
72 slotno = 0;
73 for (b = 0; b < si->si_blocks; b++) {
74 se = (struct ocfs2_slot_map_extended *)si->si_bh[b]->b_data;
75 for (i = 0;
76 (i < si->si_slots_per_block) &&
77 (slotno < si->si_num_slots);
78 i++, slotno++) {
79 if (se->se_slots[i].es_valid)
80 ocfs2_set_slot(si, slotno,
81 le32_to_cpu(se->se_slots[i].es_node_num));
82 else
83 ocfs2_invalidate_slot(si, slotno);
84 }
85 }
86 }
87
88 /*
89 * Post the slot information on disk into our slot_info struct.
90 * Must be protected by osb_lock.
91 */
ocfs2_update_slot_info_old(struct ocfs2_slot_info * si)92 static void ocfs2_update_slot_info_old(struct ocfs2_slot_info *si)
93 {
94 int i;
95 struct ocfs2_slot_map *sm;
96
97 sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data;
98
99 for (i = 0; i < si->si_num_slots; i++) {
100 if (le16_to_cpu(sm->sm_slots[i]) == (u16)OCFS2_INVALID_SLOT)
101 ocfs2_invalidate_slot(si, i);
102 else
103 ocfs2_set_slot(si, i, le16_to_cpu(sm->sm_slots[i]));
104 }
105 }
106
ocfs2_update_slot_info(struct ocfs2_slot_info * si)107 static void ocfs2_update_slot_info(struct ocfs2_slot_info *si)
108 {
109 /*
110 * The slot data will have been refreshed when ocfs2_super_lock
111 * was taken.
112 */
113 if (si->si_extended)
114 ocfs2_update_slot_info_extended(si);
115 else
116 ocfs2_update_slot_info_old(si);
117 }
118
ocfs2_refresh_slot_info(struct ocfs2_super * osb)119 int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
120 {
121 int ret;
122 struct ocfs2_slot_info *si = osb->slot_info;
123
124 if (si == NULL)
125 return 0;
126
127 BUG_ON(si->si_blocks == 0);
128 BUG_ON(si->si_bh == NULL);
129
130 trace_ocfs2_refresh_slot_info(si->si_blocks);
131
132 /*
133 * We pass -1 as blocknr because we expect all of si->si_bh to
134 * be !NULL. Thus, ocfs2_read_blocks() will ignore blocknr. If
135 * this is not true, the read of -1 (UINT64_MAX) will fail.
136 */
137 ret = ocfs2_read_blocks(INODE_CACHE(si->si_inode), -1, si->si_blocks,
138 si->si_bh, OCFS2_BH_IGNORE_CACHE,
139 ocfs2_validate_slot_map_block);
140 if (ret == 0) {
141 spin_lock(&osb->osb_lock);
142 ocfs2_update_slot_info(si);
143 spin_unlock(&osb->osb_lock);
144 }
145
146 return ret;
147 }
148
149 /* post the our slot info stuff into it's destination bh and write it
150 * out. */
ocfs2_update_disk_slot_extended(struct ocfs2_slot_info * si,int slot_num,struct buffer_head ** bh)151 static void ocfs2_update_disk_slot_extended(struct ocfs2_slot_info *si,
152 int slot_num,
153 struct buffer_head **bh)
154 {
155 int blkind = slot_num / si->si_slots_per_block;
156 int slotno = slot_num % si->si_slots_per_block;
157 struct ocfs2_slot_map_extended *se;
158
159 BUG_ON(blkind >= si->si_blocks);
160
161 se = (struct ocfs2_slot_map_extended *)si->si_bh[blkind]->b_data;
162 se->se_slots[slotno].es_valid = si->si_slots[slot_num].sl_valid;
163 if (si->si_slots[slot_num].sl_valid)
164 se->se_slots[slotno].es_node_num =
165 cpu_to_le32(si->si_slots[slot_num].sl_node_num);
166 *bh = si->si_bh[blkind];
167 }
168
ocfs2_update_disk_slot_old(struct ocfs2_slot_info * si,int slot_num,struct buffer_head ** bh)169 static void ocfs2_update_disk_slot_old(struct ocfs2_slot_info *si,
170 int slot_num,
171 struct buffer_head **bh)
172 {
173 int i;
174 struct ocfs2_slot_map *sm;
175
176 sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data;
177 for (i = 0; i < si->si_num_slots; i++) {
178 if (si->si_slots[i].sl_valid)
179 sm->sm_slots[i] =
180 cpu_to_le16(si->si_slots[i].sl_node_num);
181 else
182 sm->sm_slots[i] = cpu_to_le16(OCFS2_INVALID_SLOT);
183 }
184 *bh = si->si_bh[0];
185 }
186
ocfs2_update_disk_slot(struct ocfs2_super * osb,struct ocfs2_slot_info * si,int slot_num)187 static int ocfs2_update_disk_slot(struct ocfs2_super *osb,
188 struct ocfs2_slot_info *si,
189 int slot_num)
190 {
191 int status;
192 struct buffer_head *bh;
193
194 spin_lock(&osb->osb_lock);
195 if (si->si_extended)
196 ocfs2_update_disk_slot_extended(si, slot_num, &bh);
197 else
198 ocfs2_update_disk_slot_old(si, slot_num, &bh);
199 spin_unlock(&osb->osb_lock);
200
201 status = ocfs2_write_block(osb, bh, INODE_CACHE(si->si_inode));
202 if (status < 0)
203 mlog_errno(status);
204
205 return status;
206 }
207
208 /*
209 * Calculate how many bytes are needed by the slot map. Returns
210 * an error if the slot map file is too small.
211 */
ocfs2_slot_map_physical_size(struct ocfs2_super * osb,struct inode * inode,unsigned long long * bytes)212 static int ocfs2_slot_map_physical_size(struct ocfs2_super *osb,
213 struct inode *inode,
214 unsigned long long *bytes)
215 {
216 unsigned long long bytes_needed;
217
218 if (ocfs2_uses_extended_slot_map(osb)) {
219 bytes_needed = osb->max_slots *
220 sizeof(struct ocfs2_extended_slot);
221 } else {
222 bytes_needed = osb->max_slots * sizeof(__le16);
223 }
224 if (bytes_needed > i_size_read(inode)) {
225 mlog(ML_ERROR,
226 "Slot map file is too small! (size %llu, needed %llu)\n",
227 i_size_read(inode), bytes_needed);
228 return -ENOSPC;
229 }
230
231 *bytes = bytes_needed;
232 return 0;
233 }
234
235 /* try to find global node in the slot info. Returns -ENOENT
236 * if nothing is found. */
__ocfs2_node_num_to_slot(struct ocfs2_slot_info * si,unsigned int node_num)237 static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
238 unsigned int node_num)
239 {
240 int i, ret = -ENOENT;
241
242 for(i = 0; i < si->si_num_slots; i++) {
243 if (si->si_slots[i].sl_valid &&
244 (node_num == si->si_slots[i].sl_node_num)) {
245 ret = i;
246 break;
247 }
248 }
249
250 return ret;
251 }
252
__ocfs2_find_empty_slot(struct ocfs2_slot_info * si,int preferred)253 static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si,
254 int preferred)
255 {
256 int i, ret = -ENOSPC;
257
258 if ((preferred >= 0) && (preferred < si->si_num_slots)) {
259 if (!si->si_slots[preferred].sl_valid) {
260 ret = preferred;
261 goto out;
262 }
263 }
264
265 for(i = 0; i < si->si_num_slots; i++) {
266 if (!si->si_slots[i].sl_valid) {
267 ret = i;
268 break;
269 }
270 }
271 out:
272 return ret;
273 }
274
ocfs2_node_num_to_slot(struct ocfs2_super * osb,unsigned int node_num)275 int ocfs2_node_num_to_slot(struct ocfs2_super *osb, unsigned int node_num)
276 {
277 int slot;
278 struct ocfs2_slot_info *si = osb->slot_info;
279
280 spin_lock(&osb->osb_lock);
281 slot = __ocfs2_node_num_to_slot(si, node_num);
282 spin_unlock(&osb->osb_lock);
283
284 return slot;
285 }
286
ocfs2_slot_to_node_num_locked(struct ocfs2_super * osb,int slot_num,unsigned int * node_num)287 int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num,
288 unsigned int *node_num)
289 {
290 struct ocfs2_slot_info *si = osb->slot_info;
291
292 assert_spin_locked(&osb->osb_lock);
293
294 BUG_ON(slot_num < 0);
295 BUG_ON(slot_num >= osb->max_slots);
296
297 if (!si->si_slots[slot_num].sl_valid)
298 return -ENOENT;
299
300 *node_num = si->si_slots[slot_num].sl_node_num;
301 return 0;
302 }
303
__ocfs2_free_slot_info(struct ocfs2_slot_info * si)304 static void __ocfs2_free_slot_info(struct ocfs2_slot_info *si)
305 {
306 unsigned int i;
307
308 if (si == NULL)
309 return;
310
311 iput(si->si_inode);
312 if (si->si_bh) {
313 for (i = 0; i < si->si_blocks; i++) {
314 if (si->si_bh[i]) {
315 brelse(si->si_bh[i]);
316 si->si_bh[i] = NULL;
317 }
318 }
319 kfree(si->si_bh);
320 }
321
322 kfree(si);
323 }
324
ocfs2_clear_slot(struct ocfs2_super * osb,int slot_num)325 int ocfs2_clear_slot(struct ocfs2_super *osb, int slot_num)
326 {
327 struct ocfs2_slot_info *si = osb->slot_info;
328
329 if (si == NULL)
330 return 0;
331
332 spin_lock(&osb->osb_lock);
333 ocfs2_invalidate_slot(si, slot_num);
334 spin_unlock(&osb->osb_lock);
335
336 return ocfs2_update_disk_slot(osb, osb->slot_info, slot_num);
337 }
338
ocfs2_validate_slot_map_block(struct super_block * sb,struct buffer_head * bh)339 static int ocfs2_validate_slot_map_block(struct super_block *sb,
340 struct buffer_head *bh)
341 {
342 int rc;
343
344 BUG_ON(!buffer_uptodate(bh));
345
346 if (bh->b_blocknr < OCFS2_SUPER_BLOCK_BLKNO) {
347 rc = ocfs2_error(sb,
348 "Invalid Slot Map Buffer Head "
349 "Block Number : %llu, Should be >= %d",
350 (unsigned long long)bh->b_blocknr,
351 OCFS2_SUPER_BLOCK_BLKNO);
352 return rc;
353 }
354 return 0;
355 }
356
ocfs2_map_slot_buffers(struct ocfs2_super * osb,struct ocfs2_slot_info * si)357 static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
358 struct ocfs2_slot_info *si)
359 {
360 int status = 0;
361 u64 blkno;
362 unsigned long long blocks, bytes = 0;
363 unsigned int i;
364 struct buffer_head *bh;
365
366 status = ocfs2_slot_map_physical_size(osb, si->si_inode, &bytes);
367 if (status)
368 goto bail;
369
370 blocks = ocfs2_blocks_for_bytes(si->si_inode->i_sb, bytes);
371 BUG_ON(blocks > UINT_MAX);
372 si->si_blocks = blocks;
373 if (!si->si_blocks)
374 goto bail;
375
376 if (si->si_extended)
377 si->si_slots_per_block =
378 (osb->sb->s_blocksize /
379 sizeof(struct ocfs2_extended_slot));
380 else
381 si->si_slots_per_block = osb->sb->s_blocksize / sizeof(__le16);
382
383 /* The size checks above should ensure this */
384 BUG_ON((osb->max_slots / si->si_slots_per_block) > blocks);
385
386 trace_ocfs2_map_slot_buffers(bytes, si->si_blocks);
387
388 si->si_bh = kzalloc_objs(struct buffer_head *, si->si_blocks);
389 if (!si->si_bh) {
390 status = -ENOMEM;
391 mlog_errno(status);
392 goto bail;
393 }
394
395 for (i = 0; i < si->si_blocks; i++) {
396 status = ocfs2_extent_map_get_blocks(si->si_inode, i,
397 &blkno, NULL, NULL);
398 if (status < 0) {
399 mlog_errno(status);
400 goto bail;
401 }
402
403 trace_ocfs2_map_slot_buffers_block((unsigned long long)blkno, i);
404
405 bh = NULL; /* Acquire a fresh bh */
406 status = ocfs2_read_blocks(INODE_CACHE(si->si_inode), blkno,
407 1, &bh, OCFS2_BH_IGNORE_CACHE,
408 ocfs2_validate_slot_map_block);
409 if (status < 0) {
410 mlog_errno(status);
411 goto bail;
412 }
413
414 si->si_bh[i] = bh;
415 }
416
417 bail:
418 return status;
419 }
420
ocfs2_init_slot_info(struct ocfs2_super * osb)421 int ocfs2_init_slot_info(struct ocfs2_super *osb)
422 {
423 int status;
424 struct inode *inode = NULL;
425 struct ocfs2_slot_info *si;
426
427 si = kzalloc_flex(*si, si_slots, osb->max_slots);
428 if (!si) {
429 status = -ENOMEM;
430 mlog_errno(status);
431 return status;
432 }
433
434 si->si_extended = ocfs2_uses_extended_slot_map(osb);
435 si->si_num_slots = osb->max_slots;
436
437 inode = ocfs2_get_system_file_inode(osb, SLOT_MAP_SYSTEM_INODE,
438 OCFS2_INVALID_SLOT);
439 if (!inode) {
440 status = -EINVAL;
441 mlog_errno(status);
442 goto bail;
443 }
444
445 si->si_inode = inode;
446 status = ocfs2_map_slot_buffers(osb, si);
447 if (status < 0) {
448 mlog_errno(status);
449 goto bail;
450 }
451
452 osb->slot_info = (struct ocfs2_slot_info *)si;
453 bail:
454 if (status < 0)
455 __ocfs2_free_slot_info(si);
456
457 return status;
458 }
459
ocfs2_free_slot_info(struct ocfs2_super * osb)460 void ocfs2_free_slot_info(struct ocfs2_super *osb)
461 {
462 struct ocfs2_slot_info *si = osb->slot_info;
463
464 osb->slot_info = NULL;
465 __ocfs2_free_slot_info(si);
466 }
467
ocfs2_find_slot(struct ocfs2_super * osb)468 int ocfs2_find_slot(struct ocfs2_super *osb)
469 {
470 int status;
471 int slot;
472 struct ocfs2_slot_info *si;
473
474 si = osb->slot_info;
475
476 spin_lock(&osb->osb_lock);
477 ocfs2_update_slot_info(si);
478
479 /* search for ourselves first and take the slot if it already
480 * exists. Perhaps we need to mark this in a variable for our
481 * own journal recovery? Possibly not, though we certainly
482 * need to warn to the user */
483 slot = __ocfs2_node_num_to_slot(si, osb->node_num);
484 if (slot < 0) {
485 /* if no slot yet, then just take 1st available
486 * one. */
487 slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
488 if (slot < 0) {
489 spin_unlock(&osb->osb_lock);
490 mlog(ML_ERROR, "no free slots available!\n");
491 status = -EINVAL;
492 goto bail;
493 }
494 } else
495 printk(KERN_INFO "ocfs2: Slot %d on device (%s) was already "
496 "allocated to this node!\n", slot, osb->dev_str);
497
498 ocfs2_set_slot(si, slot, osb->node_num);
499 osb->slot_num = slot;
500 spin_unlock(&osb->osb_lock);
501
502 trace_ocfs2_find_slot(osb->slot_num);
503
504 status = ocfs2_update_disk_slot(osb, si, osb->slot_num);
505 if (status < 0) {
506 mlog_errno(status);
507 /*
508 * if write block failed, invalidate slot to avoid overwrite
509 * slot during dismount in case another node rightly has mounted
510 */
511 spin_lock(&osb->osb_lock);
512 ocfs2_invalidate_slot(si, osb->slot_num);
513 osb->slot_num = OCFS2_INVALID_SLOT;
514 spin_unlock(&osb->osb_lock);
515 }
516
517 bail:
518 return status;
519 }
520
ocfs2_put_slot(struct ocfs2_super * osb)521 void ocfs2_put_slot(struct ocfs2_super *osb)
522 {
523 int status, slot_num;
524 struct ocfs2_slot_info *si = osb->slot_info;
525
526 if (!si)
527 return;
528
529 spin_lock(&osb->osb_lock);
530 ocfs2_update_slot_info(si);
531
532 slot_num = osb->slot_num;
533 ocfs2_invalidate_slot(si, osb->slot_num);
534 osb->slot_num = OCFS2_INVALID_SLOT;
535 spin_unlock(&osb->osb_lock);
536
537 status = ocfs2_update_disk_slot(osb, si, slot_num);
538 if (status < 0)
539 mlog_errno(status);
540
541 ocfs2_free_slot_info(osb);
542 }
543