1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * NILFS disk address translation. 4 * 5 * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. 6 * 7 * Written by Koji Sato. 8 */ 9 10 #include <linux/types.h> 11 #include <linux/buffer_head.h> 12 #include <linux/string.h> 13 #include <linux/errno.h> 14 #include "nilfs.h" 15 #include "mdt.h" 16 #include "alloc.h" 17 #include "dat.h" 18 19 20 #define NILFS_CNO_MIN ((__u64)1) 21 #define NILFS_CNO_MAX (~(__u64)0) 22 23 /** 24 * struct nilfs_dat_info - on-memory private data of DAT file 25 * @mi: on-memory private data of metadata file 26 * @palloc_cache: persistent object allocator cache of DAT file 27 * @shadow: shadow map of DAT file 28 */ 29 struct nilfs_dat_info { 30 struct nilfs_mdt_info mi; 31 struct nilfs_palloc_cache palloc_cache; 32 struct nilfs_shadow_map shadow; 33 }; 34 35 static inline struct nilfs_dat_info *NILFS_DAT_I(struct inode *dat) 36 { 37 return (struct nilfs_dat_info *)NILFS_MDT(dat); 38 } 39 40 static int nilfs_dat_prepare_entry(struct inode *dat, 41 struct nilfs_palloc_req *req, int create) 42 { 43 int ret; 44 45 ret = nilfs_palloc_get_entry_block(dat, req->pr_entry_nr, 46 create, &req->pr_entry_bh); 47 if (unlikely(ret == -ENOENT)) { 48 nilfs_err(dat->i_sb, 49 "DAT doesn't have a block to manage vblocknr = %llu", 50 (unsigned long long)req->pr_entry_nr); 51 /* 52 * Return internal code -EINVAL to notify bmap layer of 53 * metadata corruption. 54 */ 55 ret = -EINVAL; 56 } 57 return ret; 58 } 59 60 static void nilfs_dat_commit_entry(struct inode *dat, 61 struct nilfs_palloc_req *req) 62 { 63 mark_buffer_dirty(req->pr_entry_bh); 64 nilfs_mdt_mark_dirty(dat); 65 brelse(req->pr_entry_bh); 66 } 67 68 static void nilfs_dat_abort_entry(struct inode *dat, 69 struct nilfs_palloc_req *req) 70 { 71 brelse(req->pr_entry_bh); 72 } 73 74 int nilfs_dat_prepare_alloc(struct inode *dat, struct nilfs_palloc_req *req) 75 { 76 int ret; 77 78 ret = nilfs_palloc_prepare_alloc_entry(dat, req, true); 79 if (ret < 0) 80 return ret; 81 82 ret = nilfs_dat_prepare_entry(dat, req, 1); 83 if (ret < 0) 84 nilfs_palloc_abort_alloc_entry(dat, req); 85 86 return ret; 87 } 88 89 void nilfs_dat_commit_alloc(struct inode *dat, struct nilfs_palloc_req *req) 90 { 91 struct nilfs_dat_entry *entry; 92 size_t offset; 93 94 offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr, 95 req->pr_entry_bh); 96 entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset); 97 entry->de_start = cpu_to_le64(NILFS_CNO_MIN); 98 entry->de_end = cpu_to_le64(NILFS_CNO_MAX); 99 entry->de_blocknr = cpu_to_le64(0); 100 kunmap_local(entry); 101 102 nilfs_palloc_commit_alloc_entry(dat, req); 103 nilfs_dat_commit_entry(dat, req); 104 } 105 106 void nilfs_dat_abort_alloc(struct inode *dat, struct nilfs_palloc_req *req) 107 { 108 nilfs_dat_abort_entry(dat, req); 109 nilfs_palloc_abort_alloc_entry(dat, req); 110 } 111 112 static void nilfs_dat_commit_free(struct inode *dat, 113 struct nilfs_palloc_req *req) 114 { 115 struct nilfs_dat_entry *entry; 116 size_t offset; 117 118 offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr, 119 req->pr_entry_bh); 120 entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset); 121 entry->de_start = cpu_to_le64(NILFS_CNO_MIN); 122 entry->de_end = cpu_to_le64(NILFS_CNO_MIN); 123 entry->de_blocknr = cpu_to_le64(0); 124 kunmap_local(entry); 125 126 nilfs_dat_commit_entry(dat, req); 127 128 if (unlikely(req->pr_desc_bh == NULL || req->pr_bitmap_bh == NULL)) { 129 nilfs_error(dat->i_sb, 130 "state inconsistency probably due to duplicate use of vblocknr = %llu", 131 (unsigned long long)req->pr_entry_nr); 132 return; 133 } 134 nilfs_palloc_commit_free_entry(dat, req); 135 } 136 137 int nilfs_dat_prepare_start(struct inode *dat, struct nilfs_palloc_req *req) 138 { 139 return nilfs_dat_prepare_entry(dat, req, 0); 140 } 141 142 void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req, 143 sector_t blocknr) 144 { 145 struct nilfs_dat_entry *entry; 146 size_t offset; 147 148 offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr, 149 req->pr_entry_bh); 150 entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset); 151 entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat)); 152 entry->de_blocknr = cpu_to_le64(blocknr); 153 kunmap_local(entry); 154 155 nilfs_dat_commit_entry(dat, req); 156 } 157 158 int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) 159 { 160 struct nilfs_dat_entry *entry; 161 __u64 start; 162 sector_t blocknr; 163 size_t offset; 164 int ret; 165 166 ret = nilfs_dat_prepare_entry(dat, req, 0); 167 if (ret < 0) 168 return ret; 169 170 offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr, 171 req->pr_entry_bh); 172 entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset); 173 start = le64_to_cpu(entry->de_start); 174 blocknr = le64_to_cpu(entry->de_blocknr); 175 kunmap_local(entry); 176 177 if (blocknr == 0) { 178 ret = nilfs_palloc_prepare_free_entry(dat, req); 179 if (ret < 0) { 180 nilfs_dat_abort_entry(dat, req); 181 return ret; 182 } 183 } 184 if (unlikely(start > nilfs_mdt_cno(dat))) { 185 nilfs_err(dat->i_sb, 186 "vblocknr = %llu has abnormal lifetime: start cno (= %llu) > current cno (= %llu)", 187 (unsigned long long)req->pr_entry_nr, 188 (unsigned long long)start, 189 (unsigned long long)nilfs_mdt_cno(dat)); 190 nilfs_dat_abort_entry(dat, req); 191 return -EINVAL; 192 } 193 194 return 0; 195 } 196 197 void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req, 198 int dead) 199 { 200 struct nilfs_dat_entry *entry; 201 __u64 start, end; 202 sector_t blocknr; 203 size_t offset; 204 205 offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr, 206 req->pr_entry_bh); 207 entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset); 208 end = start = le64_to_cpu(entry->de_start); 209 if (!dead) { 210 end = nilfs_mdt_cno(dat); 211 WARN_ON(start > end); 212 } 213 entry->de_end = cpu_to_le64(end); 214 blocknr = le64_to_cpu(entry->de_blocknr); 215 kunmap_local(entry); 216 217 if (blocknr == 0) 218 nilfs_dat_commit_free(dat, req); 219 else 220 nilfs_dat_commit_entry(dat, req); 221 } 222 223 void nilfs_dat_abort_end(struct inode *dat, struct nilfs_palloc_req *req) 224 { 225 struct nilfs_dat_entry *entry; 226 __u64 start; 227 sector_t blocknr; 228 size_t offset; 229 230 offset = nilfs_palloc_entry_offset(dat, req->pr_entry_nr, 231 req->pr_entry_bh); 232 entry = kmap_local_folio(req->pr_entry_bh->b_folio, offset); 233 start = le64_to_cpu(entry->de_start); 234 blocknr = le64_to_cpu(entry->de_blocknr); 235 kunmap_local(entry); 236 237 if (start == nilfs_mdt_cno(dat) && blocknr == 0) 238 nilfs_palloc_abort_free_entry(dat, req); 239 nilfs_dat_abort_entry(dat, req); 240 } 241 242 int nilfs_dat_prepare_update(struct inode *dat, 243 struct nilfs_palloc_req *oldreq, 244 struct nilfs_palloc_req *newreq) 245 { 246 int ret; 247 248 ret = nilfs_dat_prepare_end(dat, oldreq); 249 if (!ret) { 250 ret = nilfs_dat_prepare_alloc(dat, newreq); 251 if (ret < 0) 252 nilfs_dat_abort_end(dat, oldreq); 253 } 254 return ret; 255 } 256 257 void nilfs_dat_commit_update(struct inode *dat, 258 struct nilfs_palloc_req *oldreq, 259 struct nilfs_palloc_req *newreq, int dead) 260 { 261 nilfs_dat_commit_end(dat, oldreq, dead); 262 nilfs_dat_commit_alloc(dat, newreq); 263 } 264 265 void nilfs_dat_abort_update(struct inode *dat, 266 struct nilfs_palloc_req *oldreq, 267 struct nilfs_palloc_req *newreq) 268 { 269 nilfs_dat_abort_end(dat, oldreq); 270 nilfs_dat_abort_alloc(dat, newreq); 271 } 272 273 /** 274 * nilfs_dat_mark_dirty - mark the DAT block buffer containing the specified 275 * virtual block address entry as dirty 276 * @dat: DAT file inode 277 * @vblocknr: virtual block number 278 * 279 * Return: 0 on success, or the following negative error code on failure. 280 * * %-EINVAL - Invalid DAT entry (internal code). 281 * * %-EIO - I/O error (including metadata corruption). 282 * * %-ENOMEM - Insufficient memory available. 283 */ 284 int nilfs_dat_mark_dirty(struct inode *dat, __u64 vblocknr) 285 { 286 struct nilfs_palloc_req req; 287 int ret; 288 289 req.pr_entry_nr = vblocknr; 290 ret = nilfs_dat_prepare_entry(dat, &req, 0); 291 if (ret == 0) 292 nilfs_dat_commit_entry(dat, &req); 293 return ret; 294 } 295 296 /** 297 * nilfs_dat_freev - free virtual block numbers 298 * @dat: DAT file inode 299 * @vblocknrs: array of virtual block numbers 300 * @nitems: number of virtual block numbers 301 * 302 * Description: nilfs_dat_freev() frees the virtual block numbers specified by 303 * @vblocknrs and @nitems. 304 * 305 * Return Value: On success, 0 is returned. On error, one of the following 306 * negative error codes is returned. 307 * 308 * %-EIO - I/O error. 309 * 310 * %-ENOMEM - Insufficient amount of memory available. 311 * 312 * %-ENOENT - The virtual block number have not been allocated. 313 */ 314 int nilfs_dat_freev(struct inode *dat, __u64 *vblocknrs, size_t nitems) 315 { 316 return nilfs_palloc_freev(dat, vblocknrs, nitems); 317 } 318 319 /** 320 * nilfs_dat_move - change a block number 321 * @dat: DAT file inode 322 * @vblocknr: virtual block number 323 * @blocknr: block number 324 * 325 * Description: nilfs_dat_move() changes the block number associated with 326 * @vblocknr to @blocknr. 327 * 328 * Return Value: On success, 0 is returned. On error, one of the following 329 * negative error codes is returned. 330 * 331 * %-EIO - I/O error. 332 * 333 * %-ENOMEM - Insufficient amount of memory available. 334 */ 335 int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) 336 { 337 struct buffer_head *entry_bh; 338 struct nilfs_dat_entry *entry; 339 size_t offset; 340 int ret; 341 342 ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh); 343 if (ret < 0) 344 return ret; 345 346 /* 347 * The given disk block number (blocknr) is not yet written to 348 * the device at this point. 349 * 350 * To prevent nilfs_dat_translate() from returning the 351 * uncommitted block number, this makes a copy of the entry 352 * buffer and redirects nilfs_dat_translate() to the copy. 353 */ 354 if (!buffer_nilfs_redirected(entry_bh)) { 355 ret = nilfs_mdt_freeze_buffer(dat, entry_bh); 356 if (ret) { 357 brelse(entry_bh); 358 return ret; 359 } 360 } 361 362 offset = nilfs_palloc_entry_offset(dat, vblocknr, entry_bh); 363 entry = kmap_local_folio(entry_bh->b_folio, offset); 364 if (unlikely(entry->de_blocknr == cpu_to_le64(0))) { 365 nilfs_crit(dat->i_sb, 366 "%s: invalid vblocknr = %llu, [%llu, %llu)", 367 __func__, (unsigned long long)vblocknr, 368 (unsigned long long)le64_to_cpu(entry->de_start), 369 (unsigned long long)le64_to_cpu(entry->de_end)); 370 kunmap_local(entry); 371 brelse(entry_bh); 372 return -EINVAL; 373 } 374 WARN_ON(blocknr == 0); 375 entry->de_blocknr = cpu_to_le64(blocknr); 376 kunmap_local(entry); 377 378 mark_buffer_dirty(entry_bh); 379 nilfs_mdt_mark_dirty(dat); 380 381 brelse(entry_bh); 382 383 return 0; 384 } 385 386 /** 387 * nilfs_dat_translate - translate a virtual block number to a block number 388 * @dat: DAT file inode 389 * @vblocknr: virtual block number 390 * @blocknrp: pointer to a block number 391 * 392 * Description: nilfs_dat_translate() maps the virtual block number @vblocknr 393 * to the corresponding block number. 394 * 395 * Return Value: On success, 0 is returned and the block number associated 396 * with @vblocknr is stored in the place pointed by @blocknrp. On error, one 397 * of the following negative error codes is returned. 398 * 399 * %-EIO - I/O error. 400 * 401 * %-ENOMEM - Insufficient amount of memory available. 402 * 403 * %-ENOENT - A block number associated with @vblocknr does not exist. 404 */ 405 int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) 406 { 407 struct buffer_head *entry_bh, *bh; 408 struct nilfs_dat_entry *entry; 409 sector_t blocknr; 410 size_t offset; 411 int ret; 412 413 ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh); 414 if (ret < 0) 415 return ret; 416 417 if (!nilfs_doing_gc() && buffer_nilfs_redirected(entry_bh)) { 418 bh = nilfs_mdt_get_frozen_buffer(dat, entry_bh); 419 if (bh) { 420 WARN_ON(!buffer_uptodate(bh)); 421 brelse(entry_bh); 422 entry_bh = bh; 423 } 424 } 425 426 offset = nilfs_palloc_entry_offset(dat, vblocknr, entry_bh); 427 entry = kmap_local_folio(entry_bh->b_folio, offset); 428 blocknr = le64_to_cpu(entry->de_blocknr); 429 if (blocknr == 0) { 430 ret = -ENOENT; 431 goto out; 432 } 433 *blocknrp = blocknr; 434 435 out: 436 kunmap_local(entry); 437 brelse(entry_bh); 438 return ret; 439 } 440 441 ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned int visz, 442 size_t nvi) 443 { 444 struct buffer_head *entry_bh; 445 struct nilfs_dat_entry *entry, *first_entry; 446 struct nilfs_vinfo *vinfo = buf; 447 __u64 first, last; 448 size_t offset; 449 unsigned long entries_per_block = NILFS_MDT(dat)->mi_entries_per_block; 450 unsigned int entry_size = NILFS_MDT(dat)->mi_entry_size; 451 int i, j, n, ret; 452 453 for (i = 0; i < nvi; i += n) { 454 ret = nilfs_palloc_get_entry_block(dat, vinfo->vi_vblocknr, 455 0, &entry_bh); 456 if (ret < 0) 457 return ret; 458 459 first = vinfo->vi_vblocknr; 460 first = div64_ul(first, entries_per_block); 461 first *= entries_per_block; 462 /* first virtual block number in this block */ 463 464 last = first + entries_per_block - 1; 465 /* last virtual block number in this block */ 466 467 offset = nilfs_palloc_entry_offset(dat, first, entry_bh); 468 first_entry = kmap_local_folio(entry_bh->b_folio, offset); 469 for (j = i, n = 0; 470 j < nvi && vinfo->vi_vblocknr >= first && 471 vinfo->vi_vblocknr <= last; 472 j++, n++, vinfo = (void *)vinfo + visz) { 473 entry = (void *)first_entry + 474 (vinfo->vi_vblocknr - first) * entry_size; 475 vinfo->vi_start = le64_to_cpu(entry->de_start); 476 vinfo->vi_end = le64_to_cpu(entry->de_end); 477 vinfo->vi_blocknr = le64_to_cpu(entry->de_blocknr); 478 } 479 kunmap_local(first_entry); 480 brelse(entry_bh); 481 } 482 483 return nvi; 484 } 485 486 /** 487 * nilfs_dat_read - read or get dat inode 488 * @sb: super block instance 489 * @entry_size: size of a dat entry 490 * @raw_inode: on-disk dat inode 491 * @inodep: buffer to store the inode 492 */ 493 int nilfs_dat_read(struct super_block *sb, size_t entry_size, 494 struct nilfs_inode *raw_inode, struct inode **inodep) 495 { 496 static struct lock_class_key dat_lock_key; 497 struct inode *dat; 498 struct nilfs_dat_info *di; 499 int err; 500 501 if (entry_size > sb->s_blocksize) { 502 nilfs_err(sb, "too large DAT entry size: %zu bytes", 503 entry_size); 504 return -EINVAL; 505 } else if (entry_size < NILFS_MIN_DAT_ENTRY_SIZE) { 506 nilfs_err(sb, "too small DAT entry size: %zu bytes", 507 entry_size); 508 return -EINVAL; 509 } 510 511 dat = nilfs_iget_locked(sb, NULL, NILFS_DAT_INO); 512 if (unlikely(!dat)) 513 return -ENOMEM; 514 if (!(dat->i_state & I_NEW)) 515 goto out; 516 517 err = nilfs_mdt_init(dat, NILFS_MDT_GFP, sizeof(*di)); 518 if (err) 519 goto failed; 520 521 err = nilfs_palloc_init_blockgroup(dat, entry_size); 522 if (err) 523 goto failed; 524 525 di = NILFS_DAT_I(dat); 526 lockdep_set_class(&di->mi.mi_sem, &dat_lock_key); 527 nilfs_palloc_setup_cache(dat, &di->palloc_cache); 528 err = nilfs_mdt_setup_shadow_map(dat, &di->shadow); 529 if (err) 530 goto failed; 531 532 err = nilfs_read_inode_common(dat, raw_inode); 533 if (err) 534 goto failed; 535 536 unlock_new_inode(dat); 537 out: 538 *inodep = dat; 539 return 0; 540 failed: 541 iget_failed(dat); 542 return err; 543 } 544