1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * NILFS disk address translation. 4 * 5 * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. 6 * 7 * Written by Koji Sato. 8 */ 9 10 #include <linux/types.h> 11 #include <linux/buffer_head.h> 12 #include <linux/string.h> 13 #include <linux/errno.h> 14 #include "nilfs.h" 15 #include "mdt.h" 16 #include "alloc.h" 17 #include "dat.h" 18 19 20 #define NILFS_CNO_MIN ((__u64)1) 21 #define NILFS_CNO_MAX (~(__u64)0) 22 23 /** 24 * struct nilfs_dat_info - on-memory private data of DAT file 25 * @mi: on-memory private data of metadata file 26 * @palloc_cache: persistent object allocator cache of DAT file 27 * @shadow: shadow map of DAT file 28 */ 29 struct nilfs_dat_info { 30 struct nilfs_mdt_info mi; 31 struct nilfs_palloc_cache palloc_cache; 32 struct nilfs_shadow_map shadow; 33 }; 34 35 static inline struct nilfs_dat_info *NILFS_DAT_I(struct inode *dat) 36 { 37 return (struct nilfs_dat_info *)NILFS_MDT(dat); 38 } 39 40 static int nilfs_dat_prepare_entry(struct inode *dat, 41 struct nilfs_palloc_req *req, int create) 42 { 43 int ret; 44 45 ret = nilfs_palloc_get_entry_block(dat, req->pr_entry_nr, 46 create, &req->pr_entry_bh); 47 if (unlikely(ret == -ENOENT)) { 48 nilfs_err(dat->i_sb, 49 "DAT doesn't have a block to manage vblocknr = %llu", 50 (unsigned long long)req->pr_entry_nr); 51 /* 52 * Return internal code -EINVAL to notify bmap layer of 53 * metadata corruption. 54 */ 55 ret = -EINVAL; 56 } 57 return ret; 58 } 59 60 static void nilfs_dat_commit_entry(struct inode *dat, 61 struct nilfs_palloc_req *req) 62 { 63 mark_buffer_dirty(req->pr_entry_bh); 64 nilfs_mdt_mark_dirty(dat); 65 brelse(req->pr_entry_bh); 66 } 67 68 static void nilfs_dat_abort_entry(struct inode *dat, 69 struct nilfs_palloc_req *req) 70 { 71 brelse(req->pr_entry_bh); 72 } 73 74 int nilfs_dat_prepare_alloc(struct inode *dat, struct nilfs_palloc_req *req) 75 { 76 int ret; 77 78 ret = nilfs_palloc_prepare_alloc_entry(dat, req); 79 if (ret < 0) 80 return ret; 81 82 ret = nilfs_dat_prepare_entry(dat, req, 1); 83 if (ret < 0) 84 nilfs_palloc_abort_alloc_entry(dat, req); 85 86 return ret; 87 } 88 89 void nilfs_dat_commit_alloc(struct inode *dat, struct nilfs_palloc_req *req) 90 { 91 struct nilfs_dat_entry *entry; 92 void *kaddr; 93 94 kaddr = kmap_atomic(req->pr_entry_bh->b_page); 95 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, 96 req->pr_entry_bh, kaddr); 97 entry->de_start = cpu_to_le64(NILFS_CNO_MIN); 98 entry->de_end = cpu_to_le64(NILFS_CNO_MAX); 99 entry->de_blocknr = cpu_to_le64(0); 100 kunmap_atomic(kaddr); 101 102 nilfs_palloc_commit_alloc_entry(dat, req); 103 nilfs_dat_commit_entry(dat, req); 104 } 105 106 void nilfs_dat_abort_alloc(struct inode *dat, struct nilfs_palloc_req *req) 107 { 108 nilfs_dat_abort_entry(dat, req); 109 nilfs_palloc_abort_alloc_entry(dat, req); 110 } 111 112 static void nilfs_dat_commit_free(struct inode *dat, 113 struct nilfs_palloc_req *req) 114 { 115 struct nilfs_dat_entry *entry; 116 void *kaddr; 117 118 kaddr = kmap_atomic(req->pr_entry_bh->b_page); 119 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, 120 req->pr_entry_bh, kaddr); 121 entry->de_start = cpu_to_le64(NILFS_CNO_MIN); 122 entry->de_end = cpu_to_le64(NILFS_CNO_MIN); 123 entry->de_blocknr = cpu_to_le64(0); 124 kunmap_atomic(kaddr); 125 126 nilfs_dat_commit_entry(dat, req); 127 128 if (unlikely(req->pr_desc_bh == NULL || req->pr_bitmap_bh == NULL)) { 129 nilfs_error(dat->i_sb, 130 "state inconsistency probably due to duplicate use of vblocknr = %llu", 131 (unsigned long long)req->pr_entry_nr); 132 return; 133 } 134 nilfs_palloc_commit_free_entry(dat, req); 135 } 136 137 int nilfs_dat_prepare_start(struct inode *dat, struct nilfs_palloc_req *req) 138 { 139 return nilfs_dat_prepare_entry(dat, req, 0); 140 } 141 142 void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req, 143 sector_t blocknr) 144 { 145 struct nilfs_dat_entry *entry; 146 void *kaddr; 147 148 kaddr = kmap_atomic(req->pr_entry_bh->b_page); 149 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, 150 req->pr_entry_bh, kaddr); 151 entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat)); 152 entry->de_blocknr = cpu_to_le64(blocknr); 153 kunmap_atomic(kaddr); 154 155 nilfs_dat_commit_entry(dat, req); 156 } 157 158 int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) 159 { 160 struct nilfs_dat_entry *entry; 161 __u64 start; 162 sector_t blocknr; 163 void *kaddr; 164 int ret; 165 166 ret = nilfs_dat_prepare_entry(dat, req, 0); 167 if (ret < 0) 168 return ret; 169 170 kaddr = kmap_atomic(req->pr_entry_bh->b_page); 171 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, 172 req->pr_entry_bh, kaddr); 173 start = le64_to_cpu(entry->de_start); 174 blocknr = le64_to_cpu(entry->de_blocknr); 175 kunmap_atomic(kaddr); 176 177 if (blocknr == 0) { 178 ret = nilfs_palloc_prepare_free_entry(dat, req); 179 if (ret < 0) { 180 nilfs_dat_abort_entry(dat, req); 181 return ret; 182 } 183 } 184 if (unlikely(start > nilfs_mdt_cno(dat))) { 185 nilfs_err(dat->i_sb, 186 "vblocknr = %llu has abnormal lifetime: start cno (= %llu) > current cno (= %llu)", 187 (unsigned long long)req->pr_entry_nr, 188 (unsigned long long)start, 189 (unsigned long long)nilfs_mdt_cno(dat)); 190 nilfs_dat_abort_entry(dat, req); 191 return -EINVAL; 192 } 193 194 return 0; 195 } 196 197 void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req, 198 int dead) 199 { 200 struct nilfs_dat_entry *entry; 201 __u64 start, end; 202 sector_t blocknr; 203 void *kaddr; 204 205 kaddr = kmap_atomic(req->pr_entry_bh->b_page); 206 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, 207 req->pr_entry_bh, kaddr); 208 end = start = le64_to_cpu(entry->de_start); 209 if (!dead) { 210 end = nilfs_mdt_cno(dat); 211 WARN_ON(start > end); 212 } 213 entry->de_end = cpu_to_le64(end); 214 blocknr = le64_to_cpu(entry->de_blocknr); 215 kunmap_atomic(kaddr); 216 217 if (blocknr == 0) 218 nilfs_dat_commit_free(dat, req); 219 else 220 nilfs_dat_commit_entry(dat, req); 221 } 222 223 void nilfs_dat_abort_end(struct inode *dat, struct nilfs_palloc_req *req) 224 { 225 struct nilfs_dat_entry *entry; 226 __u64 start; 227 sector_t blocknr; 228 void *kaddr; 229 230 kaddr = kmap_atomic(req->pr_entry_bh->b_page); 231 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, 232 req->pr_entry_bh, kaddr); 233 start = le64_to_cpu(entry->de_start); 234 blocknr = le64_to_cpu(entry->de_blocknr); 235 kunmap_atomic(kaddr); 236 237 if (start == nilfs_mdt_cno(dat) && blocknr == 0) 238 nilfs_palloc_abort_free_entry(dat, req); 239 nilfs_dat_abort_entry(dat, req); 240 } 241 242 int nilfs_dat_prepare_update(struct inode *dat, 243 struct nilfs_palloc_req *oldreq, 244 struct nilfs_palloc_req *newreq) 245 { 246 int ret; 247 248 ret = nilfs_dat_prepare_end(dat, oldreq); 249 if (!ret) { 250 ret = nilfs_dat_prepare_alloc(dat, newreq); 251 if (ret < 0) 252 nilfs_dat_abort_end(dat, oldreq); 253 } 254 return ret; 255 } 256 257 void nilfs_dat_commit_update(struct inode *dat, 258 struct nilfs_palloc_req *oldreq, 259 struct nilfs_palloc_req *newreq, int dead) 260 { 261 nilfs_dat_commit_end(dat, oldreq, dead); 262 nilfs_dat_commit_alloc(dat, newreq); 263 } 264 265 void nilfs_dat_abort_update(struct inode *dat, 266 struct nilfs_palloc_req *oldreq, 267 struct nilfs_palloc_req *newreq) 268 { 269 nilfs_dat_abort_end(dat, oldreq); 270 nilfs_dat_abort_alloc(dat, newreq); 271 } 272 273 /** 274 * nilfs_dat_mark_dirty - 275 * @dat: DAT file inode 276 * @vblocknr: virtual block number 277 * 278 * Description: 279 * 280 * Return Value: On success, 0 is returned. On error, one of the following 281 * negative error codes is returned. 282 * 283 * %-EIO - I/O error. 284 * 285 * %-ENOMEM - Insufficient amount of memory available. 286 */ 287 int nilfs_dat_mark_dirty(struct inode *dat, __u64 vblocknr) 288 { 289 struct nilfs_palloc_req req; 290 int ret; 291 292 req.pr_entry_nr = vblocknr; 293 ret = nilfs_dat_prepare_entry(dat, &req, 0); 294 if (ret == 0) 295 nilfs_dat_commit_entry(dat, &req); 296 return ret; 297 } 298 299 /** 300 * nilfs_dat_freev - free virtual block numbers 301 * @dat: DAT file inode 302 * @vblocknrs: array of virtual block numbers 303 * @nitems: number of virtual block numbers 304 * 305 * Description: nilfs_dat_freev() frees the virtual block numbers specified by 306 * @vblocknrs and @nitems. 307 * 308 * Return Value: On success, 0 is returned. On error, one of the following 309 * negative error codes is returned. 310 * 311 * %-EIO - I/O error. 312 * 313 * %-ENOMEM - Insufficient amount of memory available. 314 * 315 * %-ENOENT - The virtual block number have not been allocated. 316 */ 317 int nilfs_dat_freev(struct inode *dat, __u64 *vblocknrs, size_t nitems) 318 { 319 return nilfs_palloc_freev(dat, vblocknrs, nitems); 320 } 321 322 /** 323 * nilfs_dat_move - change a block number 324 * @dat: DAT file inode 325 * @vblocknr: virtual block number 326 * @blocknr: block number 327 * 328 * Description: nilfs_dat_move() changes the block number associated with 329 * @vblocknr to @blocknr. 330 * 331 * Return Value: On success, 0 is returned. On error, one of the following 332 * negative error codes is returned. 333 * 334 * %-EIO - I/O error. 335 * 336 * %-ENOMEM - Insufficient amount of memory available. 337 */ 338 int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) 339 { 340 struct buffer_head *entry_bh; 341 struct nilfs_dat_entry *entry; 342 void *kaddr; 343 int ret; 344 345 ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh); 346 if (ret < 0) 347 return ret; 348 349 /* 350 * The given disk block number (blocknr) is not yet written to 351 * the device at this point. 352 * 353 * To prevent nilfs_dat_translate() from returning the 354 * uncommitted block number, this makes a copy of the entry 355 * buffer and redirects nilfs_dat_translate() to the copy. 356 */ 357 if (!buffer_nilfs_redirected(entry_bh)) { 358 ret = nilfs_mdt_freeze_buffer(dat, entry_bh); 359 if (ret) { 360 brelse(entry_bh); 361 return ret; 362 } 363 } 364 365 kaddr = kmap_atomic(entry_bh->b_page); 366 entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); 367 if (unlikely(entry->de_blocknr == cpu_to_le64(0))) { 368 nilfs_crit(dat->i_sb, 369 "%s: invalid vblocknr = %llu, [%llu, %llu)", 370 __func__, (unsigned long long)vblocknr, 371 (unsigned long long)le64_to_cpu(entry->de_start), 372 (unsigned long long)le64_to_cpu(entry->de_end)); 373 kunmap_atomic(kaddr); 374 brelse(entry_bh); 375 return -EINVAL; 376 } 377 WARN_ON(blocknr == 0); 378 entry->de_blocknr = cpu_to_le64(blocknr); 379 kunmap_atomic(kaddr); 380 381 mark_buffer_dirty(entry_bh); 382 nilfs_mdt_mark_dirty(dat); 383 384 brelse(entry_bh); 385 386 return 0; 387 } 388 389 /** 390 * nilfs_dat_translate - translate a virtual block number to a block number 391 * @dat: DAT file inode 392 * @vblocknr: virtual block number 393 * @blocknrp: pointer to a block number 394 * 395 * Description: nilfs_dat_translate() maps the virtual block number @vblocknr 396 * to the corresponding block number. 397 * 398 * Return Value: On success, 0 is returned and the block number associated 399 * with @vblocknr is stored in the place pointed by @blocknrp. On error, one 400 * of the following negative error codes is returned. 401 * 402 * %-EIO - I/O error. 403 * 404 * %-ENOMEM - Insufficient amount of memory available. 405 * 406 * %-ENOENT - A block number associated with @vblocknr does not exist. 407 */ 408 int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) 409 { 410 struct buffer_head *entry_bh, *bh; 411 struct nilfs_dat_entry *entry; 412 sector_t blocknr; 413 void *kaddr; 414 int ret; 415 416 ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh); 417 if (ret < 0) 418 return ret; 419 420 if (!nilfs_doing_gc() && buffer_nilfs_redirected(entry_bh)) { 421 bh = nilfs_mdt_get_frozen_buffer(dat, entry_bh); 422 if (bh) { 423 WARN_ON(!buffer_uptodate(bh)); 424 brelse(entry_bh); 425 entry_bh = bh; 426 } 427 } 428 429 kaddr = kmap_atomic(entry_bh->b_page); 430 entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); 431 blocknr = le64_to_cpu(entry->de_blocknr); 432 if (blocknr == 0) { 433 ret = -ENOENT; 434 goto out; 435 } 436 *blocknrp = blocknr; 437 438 out: 439 kunmap_atomic(kaddr); 440 brelse(entry_bh); 441 return ret; 442 } 443 444 ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned int visz, 445 size_t nvi) 446 { 447 struct buffer_head *entry_bh; 448 struct nilfs_dat_entry *entry; 449 struct nilfs_vinfo *vinfo = buf; 450 __u64 first, last; 451 void *kaddr; 452 unsigned long entries_per_block = NILFS_MDT(dat)->mi_entries_per_block; 453 int i, j, n, ret; 454 455 for (i = 0; i < nvi; i += n) { 456 ret = nilfs_palloc_get_entry_block(dat, vinfo->vi_vblocknr, 457 0, &entry_bh); 458 if (ret < 0) 459 return ret; 460 kaddr = kmap_atomic(entry_bh->b_page); 461 /* last virtual block number in this block */ 462 first = vinfo->vi_vblocknr; 463 do_div(first, entries_per_block); 464 first *= entries_per_block; 465 last = first + entries_per_block - 1; 466 for (j = i, n = 0; 467 j < nvi && vinfo->vi_vblocknr >= first && 468 vinfo->vi_vblocknr <= last; 469 j++, n++, vinfo = (void *)vinfo + visz) { 470 entry = nilfs_palloc_block_get_entry( 471 dat, vinfo->vi_vblocknr, entry_bh, kaddr); 472 vinfo->vi_start = le64_to_cpu(entry->de_start); 473 vinfo->vi_end = le64_to_cpu(entry->de_end); 474 vinfo->vi_blocknr = le64_to_cpu(entry->de_blocknr); 475 } 476 kunmap_atomic(kaddr); 477 brelse(entry_bh); 478 } 479 480 return nvi; 481 } 482 483 /** 484 * nilfs_dat_read - read or get dat inode 485 * @sb: super block instance 486 * @entry_size: size of a dat entry 487 * @raw_inode: on-disk dat inode 488 * @inodep: buffer to store the inode 489 */ 490 int nilfs_dat_read(struct super_block *sb, size_t entry_size, 491 struct nilfs_inode *raw_inode, struct inode **inodep) 492 { 493 static struct lock_class_key dat_lock_key; 494 struct inode *dat; 495 struct nilfs_dat_info *di; 496 int err; 497 498 if (entry_size > sb->s_blocksize) { 499 nilfs_err(sb, "too large DAT entry size: %zu bytes", 500 entry_size); 501 return -EINVAL; 502 } else if (entry_size < NILFS_MIN_DAT_ENTRY_SIZE) { 503 nilfs_err(sb, "too small DAT entry size: %zu bytes", 504 entry_size); 505 return -EINVAL; 506 } 507 508 dat = nilfs_iget_locked(sb, NULL, NILFS_DAT_INO); 509 if (unlikely(!dat)) 510 return -ENOMEM; 511 if (!(dat->i_state & I_NEW)) 512 goto out; 513 514 err = nilfs_mdt_init(dat, NILFS_MDT_GFP, sizeof(*di)); 515 if (err) 516 goto failed; 517 518 err = nilfs_palloc_init_blockgroup(dat, entry_size); 519 if (err) 520 goto failed; 521 522 di = NILFS_DAT_I(dat); 523 lockdep_set_class(&di->mi.mi_sem, &dat_lock_key); 524 nilfs_palloc_setup_cache(dat, &di->palloc_cache); 525 err = nilfs_mdt_setup_shadow_map(dat, &di->shadow); 526 if (err) 527 goto failed; 528 529 err = nilfs_read_inode_common(dat, raw_inode); 530 if (err) 531 goto failed; 532 533 unlock_new_inode(dat); 534 out: 535 *inodep = dat; 536 return 0; 537 failed: 538 iget_failed(dat); 539 return err; 540 } 541