1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * NILFS disk address translation. 4 * 5 * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. 6 * 7 * Written by Koji Sato. 8 */ 9 10 #include <linux/types.h> 11 #include <linux/buffer_head.h> 12 #include <linux/string.h> 13 #include <linux/errno.h> 14 #include "nilfs.h" 15 #include "mdt.h" 16 #include "alloc.h" 17 #include "dat.h" 18 19 20 #define NILFS_CNO_MIN ((__u64)1) 21 #define NILFS_CNO_MAX (~(__u64)0) 22 23 /** 24 * struct nilfs_dat_info - on-memory private data of DAT file 25 * @mi: on-memory private data of metadata file 26 * @palloc_cache: persistent object allocator cache of DAT file 27 * @shadow: shadow map of DAT file 28 */ 29 struct nilfs_dat_info { 30 struct nilfs_mdt_info mi; 31 struct nilfs_palloc_cache palloc_cache; 32 struct nilfs_shadow_map shadow; 33 }; 34 35 static inline struct nilfs_dat_info *NILFS_DAT_I(struct inode *dat) 36 { 37 return (struct nilfs_dat_info *)NILFS_MDT(dat); 38 } 39 40 static int nilfs_dat_prepare_entry(struct inode *dat, 41 struct nilfs_palloc_req *req, int create) 42 { 43 int ret; 44 45 ret = nilfs_palloc_get_entry_block(dat, req->pr_entry_nr, 46 create, &req->pr_entry_bh); 47 if (unlikely(ret == -ENOENT)) { 48 nilfs_err(dat->i_sb, 49 "DAT doesn't have a block to manage vblocknr = %llu", 50 (unsigned long long)req->pr_entry_nr); 51 /* 52 * Return internal code -EINVAL to notify bmap layer of 53 * metadata corruption. 54 */ 55 ret = -EINVAL; 56 } 57 return ret; 58 } 59 60 static void nilfs_dat_commit_entry(struct inode *dat, 61 struct nilfs_palloc_req *req) 62 { 63 mark_buffer_dirty(req->pr_entry_bh); 64 nilfs_mdt_mark_dirty(dat); 65 brelse(req->pr_entry_bh); 66 } 67 68 static void nilfs_dat_abort_entry(struct inode *dat, 69 struct nilfs_palloc_req *req) 70 { 71 brelse(req->pr_entry_bh); 72 } 73 74 int nilfs_dat_prepare_alloc(struct inode *dat, struct nilfs_palloc_req *req) 75 { 76 int ret; 77 78 ret = nilfs_palloc_prepare_alloc_entry(dat, req, true); 79 if (ret < 0) 80 return ret; 81 82 ret = nilfs_dat_prepare_entry(dat, req, 1); 83 if (ret < 0) 84 nilfs_palloc_abort_alloc_entry(dat, req); 85 86 return ret; 87 } 88 89 void nilfs_dat_commit_alloc(struct inode *dat, struct nilfs_palloc_req *req) 90 { 91 struct nilfs_dat_entry *entry; 92 void *kaddr; 93 94 kaddr = kmap_local_page(req->pr_entry_bh->b_page); 95 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, 96 req->pr_entry_bh, kaddr); 97 entry->de_start = cpu_to_le64(NILFS_CNO_MIN); 98 entry->de_end = cpu_to_le64(NILFS_CNO_MAX); 99 entry->de_blocknr = cpu_to_le64(0); 100 kunmap_local(kaddr); 101 102 nilfs_palloc_commit_alloc_entry(dat, req); 103 nilfs_dat_commit_entry(dat, req); 104 } 105 106 void nilfs_dat_abort_alloc(struct inode *dat, struct nilfs_palloc_req *req) 107 { 108 nilfs_dat_abort_entry(dat, req); 109 nilfs_palloc_abort_alloc_entry(dat, req); 110 } 111 112 static void nilfs_dat_commit_free(struct inode *dat, 113 struct nilfs_palloc_req *req) 114 { 115 struct nilfs_dat_entry *entry; 116 void *kaddr; 117 118 kaddr = kmap_local_page(req->pr_entry_bh->b_page); 119 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, 120 req->pr_entry_bh, kaddr); 121 entry->de_start = cpu_to_le64(NILFS_CNO_MIN); 122 entry->de_end = cpu_to_le64(NILFS_CNO_MIN); 123 entry->de_blocknr = cpu_to_le64(0); 124 kunmap_local(kaddr); 125 126 nilfs_dat_commit_entry(dat, req); 127 128 if (unlikely(req->pr_desc_bh == NULL || req->pr_bitmap_bh == NULL)) { 129 nilfs_error(dat->i_sb, 130 "state inconsistency probably due to duplicate use of vblocknr = %llu", 131 (unsigned long long)req->pr_entry_nr); 132 return; 133 } 134 nilfs_palloc_commit_free_entry(dat, req); 135 } 136 137 int nilfs_dat_prepare_start(struct inode *dat, struct nilfs_palloc_req *req) 138 { 139 return nilfs_dat_prepare_entry(dat, req, 0); 140 } 141 142 void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req, 143 sector_t blocknr) 144 { 145 struct nilfs_dat_entry *entry; 146 void *kaddr; 147 148 kaddr = kmap_local_page(req->pr_entry_bh->b_page); 149 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, 150 req->pr_entry_bh, kaddr); 151 entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat)); 152 entry->de_blocknr = cpu_to_le64(blocknr); 153 kunmap_local(kaddr); 154 155 nilfs_dat_commit_entry(dat, req); 156 } 157 158 int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) 159 { 160 struct nilfs_dat_entry *entry; 161 __u64 start; 162 sector_t blocknr; 163 void *kaddr; 164 int ret; 165 166 ret = nilfs_dat_prepare_entry(dat, req, 0); 167 if (ret < 0) 168 return ret; 169 170 kaddr = kmap_local_page(req->pr_entry_bh->b_page); 171 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, 172 req->pr_entry_bh, kaddr); 173 start = le64_to_cpu(entry->de_start); 174 blocknr = le64_to_cpu(entry->de_blocknr); 175 kunmap_local(kaddr); 176 177 if (blocknr == 0) { 178 ret = nilfs_palloc_prepare_free_entry(dat, req); 179 if (ret < 0) { 180 nilfs_dat_abort_entry(dat, req); 181 return ret; 182 } 183 } 184 if (unlikely(start > nilfs_mdt_cno(dat))) { 185 nilfs_err(dat->i_sb, 186 "vblocknr = %llu has abnormal lifetime: start cno (= %llu) > current cno (= %llu)", 187 (unsigned long long)req->pr_entry_nr, 188 (unsigned long long)start, 189 (unsigned long long)nilfs_mdt_cno(dat)); 190 nilfs_dat_abort_entry(dat, req); 191 return -EINVAL; 192 } 193 194 return 0; 195 } 196 197 void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req, 198 int dead) 199 { 200 struct nilfs_dat_entry *entry; 201 __u64 start, end; 202 sector_t blocknr; 203 void *kaddr; 204 205 kaddr = kmap_local_page(req->pr_entry_bh->b_page); 206 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, 207 req->pr_entry_bh, kaddr); 208 end = start = le64_to_cpu(entry->de_start); 209 if (!dead) { 210 end = nilfs_mdt_cno(dat); 211 WARN_ON(start > end); 212 } 213 entry->de_end = cpu_to_le64(end); 214 blocknr = le64_to_cpu(entry->de_blocknr); 215 kunmap_local(kaddr); 216 217 if (blocknr == 0) 218 nilfs_dat_commit_free(dat, req); 219 else 220 nilfs_dat_commit_entry(dat, req); 221 } 222 223 void nilfs_dat_abort_end(struct inode *dat, struct nilfs_palloc_req *req) 224 { 225 struct nilfs_dat_entry *entry; 226 __u64 start; 227 sector_t blocknr; 228 void *kaddr; 229 230 kaddr = kmap_local_page(req->pr_entry_bh->b_page); 231 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, 232 req->pr_entry_bh, kaddr); 233 start = le64_to_cpu(entry->de_start); 234 blocknr = le64_to_cpu(entry->de_blocknr); 235 kunmap_local(kaddr); 236 237 if (start == nilfs_mdt_cno(dat) && blocknr == 0) 238 nilfs_palloc_abort_free_entry(dat, req); 239 nilfs_dat_abort_entry(dat, req); 240 } 241 242 int nilfs_dat_prepare_update(struct inode *dat, 243 struct nilfs_palloc_req *oldreq, 244 struct nilfs_palloc_req *newreq) 245 { 246 int ret; 247 248 ret = nilfs_dat_prepare_end(dat, oldreq); 249 if (!ret) { 250 ret = nilfs_dat_prepare_alloc(dat, newreq); 251 if (ret < 0) 252 nilfs_dat_abort_end(dat, oldreq); 253 } 254 return ret; 255 } 256 257 void nilfs_dat_commit_update(struct inode *dat, 258 struct nilfs_palloc_req *oldreq, 259 struct nilfs_palloc_req *newreq, int dead) 260 { 261 nilfs_dat_commit_end(dat, oldreq, dead); 262 nilfs_dat_commit_alloc(dat, newreq); 263 } 264 265 void nilfs_dat_abort_update(struct inode *dat, 266 struct nilfs_palloc_req *oldreq, 267 struct nilfs_palloc_req *newreq) 268 { 269 nilfs_dat_abort_end(dat, oldreq); 270 nilfs_dat_abort_alloc(dat, newreq); 271 } 272 273 /** 274 * nilfs_dat_mark_dirty - mark the DAT block buffer containing the specified 275 * virtual block address entry as dirty 276 * @dat: DAT file inode 277 * @vblocknr: virtual block number 278 * 279 * Return: 0 on success, or the following negative error code on failure. 280 * * %-EINVAL - Invalid DAT entry (internal code). 281 * * %-EIO - I/O error (including metadata corruption). 282 * * %-ENOMEM - Insufficient memory available. 283 */ 284 int nilfs_dat_mark_dirty(struct inode *dat, __u64 vblocknr) 285 { 286 struct nilfs_palloc_req req; 287 int ret; 288 289 req.pr_entry_nr = vblocknr; 290 ret = nilfs_dat_prepare_entry(dat, &req, 0); 291 if (ret == 0) 292 nilfs_dat_commit_entry(dat, &req); 293 return ret; 294 } 295 296 /** 297 * nilfs_dat_freev - free virtual block numbers 298 * @dat: DAT file inode 299 * @vblocknrs: array of virtual block numbers 300 * @nitems: number of virtual block numbers 301 * 302 * Description: nilfs_dat_freev() frees the virtual block numbers specified by 303 * @vblocknrs and @nitems. 304 * 305 * Return Value: On success, 0 is returned. On error, one of the following 306 * negative error codes is returned. 307 * 308 * %-EIO - I/O error. 309 * 310 * %-ENOMEM - Insufficient amount of memory available. 311 * 312 * %-ENOENT - The virtual block number have not been allocated. 313 */ 314 int nilfs_dat_freev(struct inode *dat, __u64 *vblocknrs, size_t nitems) 315 { 316 return nilfs_palloc_freev(dat, vblocknrs, nitems); 317 } 318 319 /** 320 * nilfs_dat_move - change a block number 321 * @dat: DAT file inode 322 * @vblocknr: virtual block number 323 * @blocknr: block number 324 * 325 * Description: nilfs_dat_move() changes the block number associated with 326 * @vblocknr to @blocknr. 327 * 328 * Return Value: On success, 0 is returned. On error, one of the following 329 * negative error codes is returned. 330 * 331 * %-EIO - I/O error. 332 * 333 * %-ENOMEM - Insufficient amount of memory available. 334 */ 335 int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) 336 { 337 struct buffer_head *entry_bh; 338 struct nilfs_dat_entry *entry; 339 void *kaddr; 340 int ret; 341 342 ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh); 343 if (ret < 0) 344 return ret; 345 346 /* 347 * The given disk block number (blocknr) is not yet written to 348 * the device at this point. 349 * 350 * To prevent nilfs_dat_translate() from returning the 351 * uncommitted block number, this makes a copy of the entry 352 * buffer and redirects nilfs_dat_translate() to the copy. 353 */ 354 if (!buffer_nilfs_redirected(entry_bh)) { 355 ret = nilfs_mdt_freeze_buffer(dat, entry_bh); 356 if (ret) { 357 brelse(entry_bh); 358 return ret; 359 } 360 } 361 362 kaddr = kmap_local_page(entry_bh->b_page); 363 entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); 364 if (unlikely(entry->de_blocknr == cpu_to_le64(0))) { 365 nilfs_crit(dat->i_sb, 366 "%s: invalid vblocknr = %llu, [%llu, %llu)", 367 __func__, (unsigned long long)vblocknr, 368 (unsigned long long)le64_to_cpu(entry->de_start), 369 (unsigned long long)le64_to_cpu(entry->de_end)); 370 kunmap_local(kaddr); 371 brelse(entry_bh); 372 return -EINVAL; 373 } 374 WARN_ON(blocknr == 0); 375 entry->de_blocknr = cpu_to_le64(blocknr); 376 kunmap_local(kaddr); 377 378 mark_buffer_dirty(entry_bh); 379 nilfs_mdt_mark_dirty(dat); 380 381 brelse(entry_bh); 382 383 return 0; 384 } 385 386 /** 387 * nilfs_dat_translate - translate a virtual block number to a block number 388 * @dat: DAT file inode 389 * @vblocknr: virtual block number 390 * @blocknrp: pointer to a block number 391 * 392 * Description: nilfs_dat_translate() maps the virtual block number @vblocknr 393 * to the corresponding block number. 394 * 395 * Return Value: On success, 0 is returned and the block number associated 396 * with @vblocknr is stored in the place pointed by @blocknrp. On error, one 397 * of the following negative error codes is returned. 398 * 399 * %-EIO - I/O error. 400 * 401 * %-ENOMEM - Insufficient amount of memory available. 402 * 403 * %-ENOENT - A block number associated with @vblocknr does not exist. 404 */ 405 int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) 406 { 407 struct buffer_head *entry_bh, *bh; 408 struct nilfs_dat_entry *entry; 409 sector_t blocknr; 410 void *kaddr; 411 int ret; 412 413 ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh); 414 if (ret < 0) 415 return ret; 416 417 if (!nilfs_doing_gc() && buffer_nilfs_redirected(entry_bh)) { 418 bh = nilfs_mdt_get_frozen_buffer(dat, entry_bh); 419 if (bh) { 420 WARN_ON(!buffer_uptodate(bh)); 421 brelse(entry_bh); 422 entry_bh = bh; 423 } 424 } 425 426 kaddr = kmap_local_page(entry_bh->b_page); 427 entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); 428 blocknr = le64_to_cpu(entry->de_blocknr); 429 if (blocknr == 0) { 430 ret = -ENOENT; 431 goto out; 432 } 433 *blocknrp = blocknr; 434 435 out: 436 kunmap_local(kaddr); 437 brelse(entry_bh); 438 return ret; 439 } 440 441 ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned int visz, 442 size_t nvi) 443 { 444 struct buffer_head *entry_bh; 445 struct nilfs_dat_entry *entry; 446 struct nilfs_vinfo *vinfo = buf; 447 __u64 first, last; 448 void *kaddr; 449 unsigned long entries_per_block = NILFS_MDT(dat)->mi_entries_per_block; 450 int i, j, n, ret; 451 452 for (i = 0; i < nvi; i += n) { 453 ret = nilfs_palloc_get_entry_block(dat, vinfo->vi_vblocknr, 454 0, &entry_bh); 455 if (ret < 0) 456 return ret; 457 kaddr = kmap_local_page(entry_bh->b_page); 458 /* last virtual block number in this block */ 459 first = vinfo->vi_vblocknr; 460 first = div64_ul(first, entries_per_block); 461 first *= entries_per_block; 462 last = first + entries_per_block - 1; 463 for (j = i, n = 0; 464 j < nvi && vinfo->vi_vblocknr >= first && 465 vinfo->vi_vblocknr <= last; 466 j++, n++, vinfo = (void *)vinfo + visz) { 467 entry = nilfs_palloc_block_get_entry( 468 dat, vinfo->vi_vblocknr, entry_bh, kaddr); 469 vinfo->vi_start = le64_to_cpu(entry->de_start); 470 vinfo->vi_end = le64_to_cpu(entry->de_end); 471 vinfo->vi_blocknr = le64_to_cpu(entry->de_blocknr); 472 } 473 kunmap_local(kaddr); 474 brelse(entry_bh); 475 } 476 477 return nvi; 478 } 479 480 /** 481 * nilfs_dat_read - read or get dat inode 482 * @sb: super block instance 483 * @entry_size: size of a dat entry 484 * @raw_inode: on-disk dat inode 485 * @inodep: buffer to store the inode 486 */ 487 int nilfs_dat_read(struct super_block *sb, size_t entry_size, 488 struct nilfs_inode *raw_inode, struct inode **inodep) 489 { 490 static struct lock_class_key dat_lock_key; 491 struct inode *dat; 492 struct nilfs_dat_info *di; 493 int err; 494 495 if (entry_size > sb->s_blocksize) { 496 nilfs_err(sb, "too large DAT entry size: %zu bytes", 497 entry_size); 498 return -EINVAL; 499 } else if (entry_size < NILFS_MIN_DAT_ENTRY_SIZE) { 500 nilfs_err(sb, "too small DAT entry size: %zu bytes", 501 entry_size); 502 return -EINVAL; 503 } 504 505 dat = nilfs_iget_locked(sb, NULL, NILFS_DAT_INO); 506 if (unlikely(!dat)) 507 return -ENOMEM; 508 if (!(dat->i_state & I_NEW)) 509 goto out; 510 511 err = nilfs_mdt_init(dat, NILFS_MDT_GFP, sizeof(*di)); 512 if (err) 513 goto failed; 514 515 err = nilfs_palloc_init_blockgroup(dat, entry_size); 516 if (err) 517 goto failed; 518 519 di = NILFS_DAT_I(dat); 520 lockdep_set_class(&di->mi.mi_sem, &dat_lock_key); 521 nilfs_palloc_setup_cache(dat, &di->palloc_cache); 522 err = nilfs_mdt_setup_shadow_map(dat, &di->shadow); 523 if (err) 524 goto failed; 525 526 err = nilfs_read_inode_common(dat, raw_inode); 527 if (err) 528 goto failed; 529 530 unlock_new_inode(dat); 531 out: 532 *inodep = dat; 533 return 0; 534 failed: 535 iget_failed(dat); 536 return err; 537 } 538