1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * NILFS disk address translation. 4 * 5 * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. 6 * 7 * Written by Koji Sato. 8 */ 9 10 #include <linux/types.h> 11 #include <linux/buffer_head.h> 12 #include <linux/string.h> 13 #include <linux/errno.h> 14 #include "nilfs.h" 15 #include "mdt.h" 16 #include "alloc.h" 17 #include "dat.h" 18 19 20 #define NILFS_CNO_MIN ((__u64)1) 21 #define NILFS_CNO_MAX (~(__u64)0) 22 23 /** 24 * struct nilfs_dat_info - on-memory private data of DAT file 25 * @mi: on-memory private data of metadata file 26 * @palloc_cache: persistent object allocator cache of DAT file 27 * @shadow: shadow map of DAT file 28 */ 29 struct nilfs_dat_info { 30 struct nilfs_mdt_info mi; 31 struct nilfs_palloc_cache palloc_cache; 32 struct nilfs_shadow_map shadow; 33 }; 34 35 static inline struct nilfs_dat_info *NILFS_DAT_I(struct inode *dat) 36 { 37 return (struct nilfs_dat_info *)NILFS_MDT(dat); 38 } 39 40 static int nilfs_dat_prepare_entry(struct inode *dat, 41 struct nilfs_palloc_req *req, int create) 42 { 43 int ret; 44 45 ret = nilfs_palloc_get_entry_block(dat, req->pr_entry_nr, 46 create, &req->pr_entry_bh); 47 if (unlikely(ret == -ENOENT)) { 48 nilfs_err(dat->i_sb, 49 "DAT doesn't have a block to manage vblocknr = %llu", 50 (unsigned long long)req->pr_entry_nr); 51 /* 52 * Return internal code -EINVAL to notify bmap layer of 53 * metadata corruption. 54 */ 55 ret = -EINVAL; 56 } 57 return ret; 58 } 59 60 static void nilfs_dat_commit_entry(struct inode *dat, 61 struct nilfs_palloc_req *req) 62 { 63 mark_buffer_dirty(req->pr_entry_bh); 64 nilfs_mdt_mark_dirty(dat); 65 brelse(req->pr_entry_bh); 66 } 67 68 static void nilfs_dat_abort_entry(struct inode *dat, 69 struct nilfs_palloc_req *req) 70 { 71 brelse(req->pr_entry_bh); 72 } 73 74 int nilfs_dat_prepare_alloc(struct inode *dat, struct nilfs_palloc_req *req) 75 { 76 int ret; 77 78 ret = nilfs_palloc_prepare_alloc_entry(dat, req, true); 79 if (ret < 0) 80 return ret; 81 82 ret = nilfs_dat_prepare_entry(dat, req, 1); 83 if (ret < 0) 84 nilfs_palloc_abort_alloc_entry(dat, req); 85 86 return ret; 87 } 88 89 void nilfs_dat_commit_alloc(struct inode *dat, struct nilfs_palloc_req *req) 90 { 91 struct nilfs_dat_entry *entry; 92 size_t offset; 93 94 offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr, 95 req->pr_entry_bh); 96 entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset); 97 entry->de_start = cpu_to_le64(NILFS_CNO_MIN); 98 entry->de_end = cpu_to_le64(NILFS_CNO_MAX); 99 entry->de_blocknr = cpu_to_le64(0); 100 kunmap_local(entry); 101 102 nilfs_palloc_commit_alloc_entry(dat, req); 103 nilfs_dat_commit_entry(dat, req); 104 } 105 106 void nilfs_dat_abort_alloc(struct inode *dat, struct nilfs_palloc_req *req) 107 { 108 nilfs_dat_abort_entry(dat, req); 109 nilfs_palloc_abort_alloc_entry(dat, req); 110 } 111 112 static void nilfs_dat_commit_free(struct inode *dat, 113 struct nilfs_palloc_req *req) 114 { 115 struct nilfs_dat_entry *entry; 116 size_t offset; 117 118 offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr, 119 req->pr_entry_bh); 120 entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset); 121 entry->de_start = cpu_to_le64(NILFS_CNO_MIN); 122 entry->de_end = cpu_to_le64(NILFS_CNO_MIN); 123 entry->de_blocknr = cpu_to_le64(0); 124 kunmap_local(entry); 125 126 nilfs_dat_commit_entry(dat, req); 127 128 if (unlikely(req->pr_desc_bh == NULL || req->pr_bitmap_bh == NULL)) { 129 nilfs_error(dat->i_sb, 130 "state inconsistency probably due to duplicate use of vblocknr = %llu", 131 (unsigned long long)req->pr_entry_nr); 132 return; 133 } 134 nilfs_palloc_commit_free_entry(dat, req); 135 } 136 137 int nilfs_dat_prepare_start(struct inode *dat, struct nilfs_palloc_req *req) 138 { 139 return nilfs_dat_prepare_entry(dat, req, 0); 140 } 141 142 void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req, 143 sector_t blocknr) 144 { 145 struct nilfs_dat_entry *entry; 146 size_t offset; 147 148 offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr, 149 req->pr_entry_bh); 150 entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset); 151 entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat)); 152 entry->de_blocknr = cpu_to_le64(blocknr); 153 kunmap_local(entry); 154 155 nilfs_dat_commit_entry(dat, req); 156 } 157 158 int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) 159 { 160 struct nilfs_dat_entry *entry; 161 __u64 start; 162 sector_t blocknr; 163 size_t offset; 164 int ret; 165 166 ret = nilfs_dat_prepare_entry(dat, req, 0); 167 if (ret < 0) 168 return ret; 169 170 offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr, 171 req->pr_entry_bh); 172 entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset); 173 start = le64_to_cpu(entry->de_start); 174 blocknr = le64_to_cpu(entry->de_blocknr); 175 kunmap_local(entry); 176 177 if (blocknr == 0) { 178 ret = nilfs_palloc_prepare_free_entry(dat, req); 179 if (ret < 0) { 180 nilfs_dat_abort_entry(dat, req); 181 return ret; 182 } 183 } 184 if (unlikely(start > nilfs_mdt_cno(dat))) { 185 nilfs_err(dat->i_sb, 186 "vblocknr = %llu has abnormal lifetime: start cno (= %llu) > current cno (= %llu)", 187 (unsigned long long)req->pr_entry_nr, 188 (unsigned long long)start, 189 (unsigned long long)nilfs_mdt_cno(dat)); 190 nilfs_dat_abort_entry(dat, req); 191 return -EINVAL; 192 } 193 194 return 0; 195 } 196 197 void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req, 198 int dead) 199 { 200 struct nilfs_dat_entry *entry; 201 __u64 start, end; 202 sector_t blocknr; 203 size_t offset; 204 205 offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr, 206 req->pr_entry_bh); 207 entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset); 208 end = start = le64_to_cpu(entry->de_start); 209 if (!dead) { 210 end = nilfs_mdt_cno(dat); 211 WARN_ON(start > end); 212 } 213 entry->de_end = cpu_to_le64(end); 214 blocknr = le64_to_cpu(entry->de_blocknr); 215 kunmap_local(entry); 216 217 if (blocknr == 0) 218 nilfs_dat_commit_free(dat, req); 219 else 220 nilfs_dat_commit_entry(dat, req); 221 } 222 223 void nilfs_dat_abort_end(struct inode *dat, struct nilfs_palloc_req *req) 224 { 225 struct nilfs_dat_entry *entry; 226 __u64 start; 227 sector_t blocknr; 228 size_t offset; 229 230 offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr, 231 req->pr_entry_bh); 232 entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset); 233 start = le64_to_cpu(entry->de_start); 234 blocknr = le64_to_cpu(entry->de_blocknr); 235 kunmap_local(entry); 236 237 if (start == nilfs_mdt_cno(dat) && blocknr == 0) 238 nilfs_palloc_abort_free_entry(dat, req); 239 nilfs_dat_abort_entry(dat, req); 240 } 241 242 int nilfs_dat_prepare_update(struct inode *dat, 243 struct nilfs_palloc_req *oldreq, 244 struct nilfs_palloc_req *newreq) 245 { 246 int ret; 247 248 ret = nilfs_dat_prepare_end(dat, oldreq); 249 if (!ret) { 250 ret = nilfs_dat_prepare_alloc(dat, newreq); 251 if (ret < 0) 252 nilfs_dat_abort_end(dat, oldreq); 253 } 254 return ret; 255 } 256 257 void nilfs_dat_commit_update(struct inode *dat, 258 struct nilfs_palloc_req *oldreq, 259 struct nilfs_palloc_req *newreq, int dead) 260 { 261 nilfs_dat_commit_end(dat, oldreq, dead); 262 nilfs_dat_commit_alloc(dat, newreq); 263 } 264 265 void nilfs_dat_abort_update(struct inode *dat, 266 struct nilfs_palloc_req *oldreq, 267 struct nilfs_palloc_req *newreq) 268 { 269 nilfs_dat_abort_end(dat, oldreq); 270 nilfs_dat_abort_alloc(dat, newreq); 271 } 272 273 /** 274 * nilfs_dat_mark_dirty - mark the DAT block buffer containing the specified 275 * virtual block address entry as dirty 276 * @dat: DAT file inode 277 * @vblocknr: virtual block number 278 * 279 * Return: 0 on success, or one of the following negative error codes on 280 * failure: 281 * * %-EINVAL - Invalid DAT entry (internal code). 282 * * %-EIO - I/O error (including metadata corruption). 283 * * %-ENOMEM - Insufficient memory available. 284 */ 285 int nilfs_dat_mark_dirty(struct inode *dat, __u64 vblocknr) 286 { 287 struct nilfs_palloc_req req; 288 int ret; 289 290 req.pr_entry_nr = vblocknr; 291 ret = nilfs_dat_prepare_entry(dat, &req, 0); 292 if (ret == 0) 293 nilfs_dat_commit_entry(dat, &req); 294 return ret; 295 } 296 297 /** 298 * nilfs_dat_freev - free virtual block numbers 299 * @dat: DAT file inode 300 * @vblocknrs: array of virtual block numbers 301 * @nitems: number of virtual block numbers 302 * 303 * Description: nilfs_dat_freev() frees the virtual block numbers specified by 304 * @vblocknrs and @nitems. 305 * 306 * Return: 0 on success, or one of the following negative error codes on 307 * failure: 308 * * %-EIO - I/O error (including metadata corruption). 309 * * %-ENOENT - The virtual block number have not been allocated. 310 * * %-ENOMEM - Insufficient memory available. 311 */ 312 int nilfs_dat_freev(struct inode *dat, __u64 *vblocknrs, size_t nitems) 313 { 314 return nilfs_palloc_freev(dat, vblocknrs, nitems); 315 } 316 317 /** 318 * nilfs_dat_move - change a block number 319 * @dat: DAT file inode 320 * @vblocknr: virtual block number 321 * @blocknr: block number 322 * 323 * Description: nilfs_dat_move() changes the block number associated with 324 * @vblocknr to @blocknr. 325 * 326 * Return: 0 on success, or one of the following negative error codes on 327 * failure: 328 * * %-EIO - I/O error (including metadata corruption). 329 * * %-ENOMEM - Insufficient memory available. 330 */ 331 int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) 332 { 333 struct buffer_head *entry_bh; 334 struct nilfs_dat_entry *entry; 335 size_t offset; 336 int ret; 337 338 ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh); 339 if (ret < 0) 340 return ret; 341 342 /* 343 * The given disk block number (blocknr) is not yet written to 344 * the device at this point. 345 * 346 * To prevent nilfs_dat_translate() from returning the 347 * uncommitted block number, this makes a copy of the entry 348 * buffer and redirects nilfs_dat_translate() to the copy. 349 */ 350 if (!buffer_nilfs_redirected(entry_bh)) { 351 ret = nilfs_mdt_freeze_buffer(dat, entry_bh); 352 if (ret) { 353 brelse(entry_bh); 354 return ret; 355 } 356 } 357 358 offset = nilfs_palloc_entry_offset(dat, vblocknr, entry_bh); 359 entry = kmap_local_folio(entry_bh->b_folio, offset); 360 if (unlikely(entry->de_blocknr == cpu_to_le64(0))) { 361 nilfs_crit(dat->i_sb, 362 "%s: invalid vblocknr = %llu, [%llu, %llu)", 363 __func__, (unsigned long long)vblocknr, 364 (unsigned long long)le64_to_cpu(entry->de_start), 365 (unsigned long long)le64_to_cpu(entry->de_end)); 366 kunmap_local(entry); 367 brelse(entry_bh); 368 return -EINVAL; 369 } 370 WARN_ON(blocknr == 0); 371 entry->de_blocknr = cpu_to_le64(blocknr); 372 kunmap_local(entry); 373 374 mark_buffer_dirty(entry_bh); 375 nilfs_mdt_mark_dirty(dat); 376 377 brelse(entry_bh); 378 379 return 0; 380 } 381 382 /** 383 * nilfs_dat_translate - translate a virtual block number to a block number 384 * @dat: DAT file inode 385 * @vblocknr: virtual block number 386 * @blocknrp: pointer to a block number 387 * 388 * Description: nilfs_dat_translate() maps the virtual block number @vblocknr 389 * to the corresponding block number. The block number associated with 390 * @vblocknr is stored in the place pointed to by @blocknrp. 391 * 392 * Return: 0 on success, or one of the following negative error codes on 393 * failure: 394 * * %-EIO - I/O error (including metadata corruption). 395 * * %-ENOENT - A block number associated with @vblocknr does not exist. 396 * * %-ENOMEM - Insufficient memory available. 397 */ 398 int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) 399 { 400 struct buffer_head *entry_bh, *bh; 401 struct nilfs_dat_entry *entry; 402 sector_t blocknr; 403 size_t offset; 404 int ret; 405 406 ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh); 407 if (ret < 0) 408 return ret; 409 410 if (!nilfs_doing_gc() && buffer_nilfs_redirected(entry_bh)) { 411 bh = nilfs_mdt_get_frozen_buffer(dat, entry_bh); 412 if (bh) { 413 WARN_ON(!buffer_uptodate(bh)); 414 brelse(entry_bh); 415 entry_bh = bh; 416 } 417 } 418 419 offset = nilfs_palloc_entry_offset(dat, vblocknr, entry_bh); 420 entry = kmap_local_folio(entry_bh->b_folio, offset); 421 blocknr = le64_to_cpu(entry->de_blocknr); 422 if (blocknr == 0) { 423 ret = -ENOENT; 424 goto out; 425 } 426 *blocknrp = blocknr; 427 428 out: 429 kunmap_local(entry); 430 brelse(entry_bh); 431 return ret; 432 } 433 434 ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned int visz, 435 size_t nvi) 436 { 437 struct buffer_head *entry_bh; 438 struct nilfs_dat_entry *entry, *first_entry; 439 struct nilfs_vinfo *vinfo = buf; 440 __u64 first, last; 441 size_t offset; 442 unsigned long entries_per_block = NILFS_MDT(dat)->mi_entries_per_block; 443 unsigned int entry_size = NILFS_MDT(dat)->mi_entry_size; 444 int i, j, n, ret; 445 446 for (i = 0; i < nvi; i += n) { 447 ret = nilfs_palloc_get_entry_block(dat, vinfo->vi_vblocknr, 448 0, &entry_bh); 449 if (ret < 0) 450 return ret; 451 452 first = vinfo->vi_vblocknr; 453 first = div64_ul(first, entries_per_block); 454 first *= entries_per_block; 455 /* first virtual block number in this block */ 456 457 last = first + entries_per_block - 1; 458 /* last virtual block number in this block */ 459 460 offset = nilfs_palloc_entry_offset(dat, first, entry_bh); 461 first_entry = kmap_local_folio(entry_bh->b_folio, offset); 462 for (j = i, n = 0; 463 j < nvi && vinfo->vi_vblocknr >= first && 464 vinfo->vi_vblocknr <= last; 465 j++, n++, vinfo = (void *)vinfo + visz) { 466 entry = (void *)first_entry + 467 (vinfo->vi_vblocknr - first) * entry_size; 468 vinfo->vi_start = le64_to_cpu(entry->de_start); 469 vinfo->vi_end = le64_to_cpu(entry->de_end); 470 vinfo->vi_blocknr = le64_to_cpu(entry->de_blocknr); 471 } 472 kunmap_local(first_entry); 473 brelse(entry_bh); 474 } 475 476 return nvi; 477 } 478 479 /** 480 * nilfs_dat_read - read or get dat inode 481 * @sb: super block instance 482 * @entry_size: size of a dat entry 483 * @raw_inode: on-disk dat inode 484 * @inodep: buffer to store the inode 485 * 486 * Return: 0 on success, or a negative error code on failure. 487 */ 488 int nilfs_dat_read(struct super_block *sb, size_t entry_size, 489 struct nilfs_inode *raw_inode, struct inode **inodep) 490 { 491 static struct lock_class_key dat_lock_key; 492 struct inode *dat; 493 struct nilfs_dat_info *di; 494 int err; 495 496 if (entry_size > sb->s_blocksize) { 497 nilfs_err(sb, "too large DAT entry size: %zu bytes", 498 entry_size); 499 return -EINVAL; 500 } else if (entry_size < NILFS_MIN_DAT_ENTRY_SIZE) { 501 nilfs_err(sb, "too small DAT entry size: %zu bytes", 502 entry_size); 503 return -EINVAL; 504 } 505 506 dat = nilfs_iget_locked(sb, NULL, NILFS_DAT_INO); 507 if (unlikely(!dat)) 508 return -ENOMEM; 509 if (!(dat->i_state & I_NEW)) 510 goto out; 511 512 err = nilfs_mdt_init(dat, NILFS_MDT_GFP, sizeof(*di)); 513 if (err) 514 goto failed; 515 516 err = nilfs_palloc_init_blockgroup(dat, entry_size); 517 if (err) 518 goto failed; 519 520 di = NILFS_DAT_I(dat); 521 lockdep_set_class(&di->mi.mi_sem, &dat_lock_key); 522 nilfs_palloc_setup_cache(dat, &di->palloc_cache); 523 err = nilfs_mdt_setup_shadow_map(dat, &di->shadow); 524 if (err) 525 goto failed; 526 527 err = nilfs_read_inode_common(dat, raw_inode); 528 if (err) 529 goto failed; 530 531 unlock_new_inode(dat); 532 out: 533 *inodep = dat; 534 return 0; 535 failed: 536 iget_failed(dat); 537 return err; 538 } 539