1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2011 Red Hat, Inc. 4 * 5 * This file is released under the GPL. 6 */ 7 #include "dm-block-manager.h" 8 #include "dm-persistent-data-internal.h" 9 10 #include <linux/dm-bufio.h> 11 #include <linux/crc32c.h> 12 #include <linux/module.h> 13 #include <linux/slab.h> 14 #include <linux/rwsem.h> 15 #include <linux/device-mapper.h> 16 #include <linux/stacktrace.h> 17 #include <linux/sched/task.h> 18 19 #define DM_MSG_PREFIX "block manager" 20 21 /*----------------------------------------------------------------*/ 22 23 #ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING 24 25 /* 26 * This is a read/write semaphore with a couple of differences. 27 * 28 * i) There is a restriction on the number of concurrent read locks that 29 * may be held at once. This is just an implementation detail. 30 * 31 * ii) Recursive locking attempts are detected and return EINVAL. A stack 32 * trace is also emitted for the previous lock acquisition. 33 * 34 * iii) Priority is given to write locks. 35 */ 36 #define MAX_HOLDERS 4 37 #define MAX_STACK 10 38 39 struct stack_store { 40 unsigned int nr_entries; 41 unsigned long entries[MAX_STACK]; 42 }; 43 44 struct block_lock { 45 spinlock_t lock; 46 __s32 count; 47 struct list_head waiters; 48 struct task_struct *holders[MAX_HOLDERS]; 49 50 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 51 struct stack_store traces[MAX_HOLDERS]; 52 #endif 53 }; 54 55 struct waiter { 56 struct list_head list; 57 struct task_struct *task; 58 int wants_write; 59 }; 60 61 static unsigned int __find_holder(struct block_lock *lock, 62 struct task_struct *task) 63 { 64 unsigned int i; 65 66 for (i = 0; i < MAX_HOLDERS; i++) 67 if (lock->holders[i] == task) 68 break; 69 70 BUG_ON(i == MAX_HOLDERS); 71 return i; 72 } 73 74 /* call this *after* you increment lock->count */ 75 static void __add_holder(struct block_lock *lock, struct task_struct *task) 76 { 77 unsigned int h = __find_holder(lock, NULL); 78 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 79 struct stack_store *t; 80 #endif 81 82 get_task_struct(task); 83 lock->holders[h] = task; 84 85 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 86 t = lock->traces + h; 87 t->nr_entries = stack_trace_save(t->entries, MAX_STACK, 2); 88 #endif 89 } 90 91 /* call this *before* you decrement lock->count */ 92 static void __del_holder(struct block_lock *lock, struct task_struct *task) 93 { 94 unsigned int h = __find_holder(lock, task); 95 96 lock->holders[h] = NULL; 97 put_task_struct(task); 98 } 99 100 static int __check_holder(struct block_lock *lock) 101 { 102 unsigned int i; 103 104 for (i = 0; i < MAX_HOLDERS; i++) { 105 if (lock->holders[i] == current) { 106 DMERR("recursive lock detected in metadata"); 107 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 108 DMERR("previously held here:"); 109 stack_trace_print(lock->traces[i].entries, 110 lock->traces[i].nr_entries, 4); 111 112 DMERR("subsequent acquisition attempted here:"); 113 dump_stack(); 114 #endif 115 return -EINVAL; 116 } 117 } 118 119 return 0; 120 } 121 122 static void __wait(struct waiter *w) 123 { 124 for (;;) { 125 set_current_state(TASK_UNINTERRUPTIBLE); 126 127 if (!w->task) 128 break; 129 130 schedule(); 131 } 132 133 set_current_state(TASK_RUNNING); 134 } 135 136 static void __wake_waiter(struct waiter *w) 137 { 138 struct task_struct *task; 139 140 list_del(&w->list); 141 task = w->task; 142 smp_mb(); 143 w->task = NULL; 144 wake_up_process(task); 145 } 146 147 /* 148 * We either wake a few readers or a single writer. 149 */ 150 static void __wake_many(struct block_lock *lock) 151 { 152 struct waiter *w, *tmp; 153 154 BUG_ON(lock->count < 0); 155 list_for_each_entry_safe(w, tmp, &lock->waiters, list) { 156 if (lock->count >= MAX_HOLDERS) 157 return; 158 159 if (w->wants_write) { 160 if (lock->count > 0) 161 return; /* still read locked */ 162 163 lock->count = -1; 164 __add_holder(lock, w->task); 165 __wake_waiter(w); 166 return; 167 } 168 169 lock->count++; 170 __add_holder(lock, w->task); 171 __wake_waiter(w); 172 } 173 } 174 175 static void bl_init(struct block_lock *lock) 176 { 177 int i; 178 179 spin_lock_init(&lock->lock); 180 lock->count = 0; 181 INIT_LIST_HEAD(&lock->waiters); 182 for (i = 0; i < MAX_HOLDERS; i++) 183 lock->holders[i] = NULL; 184 } 185 186 static int __available_for_read(struct block_lock *lock) 187 { 188 return lock->count >= 0 && 189 lock->count < MAX_HOLDERS && 190 list_empty(&lock->waiters); 191 } 192 193 static int bl_down_read(struct block_lock *lock) 194 { 195 int r; 196 struct waiter w; 197 198 spin_lock(&lock->lock); 199 r = __check_holder(lock); 200 if (r) { 201 spin_unlock(&lock->lock); 202 return r; 203 } 204 205 if (__available_for_read(lock)) { 206 lock->count++; 207 __add_holder(lock, current); 208 spin_unlock(&lock->lock); 209 return 0; 210 } 211 212 get_task_struct(current); 213 214 w.task = current; 215 w.wants_write = 0; 216 list_add_tail(&w.list, &lock->waiters); 217 spin_unlock(&lock->lock); 218 219 __wait(&w); 220 put_task_struct(current); 221 return 0; 222 } 223 224 static int bl_down_read_nonblock(struct block_lock *lock) 225 { 226 int r; 227 228 spin_lock(&lock->lock); 229 r = __check_holder(lock); 230 if (r) 231 goto out; 232 233 if (__available_for_read(lock)) { 234 lock->count++; 235 __add_holder(lock, current); 236 r = 0; 237 } else 238 r = -EWOULDBLOCK; 239 240 out: 241 spin_unlock(&lock->lock); 242 return r; 243 } 244 245 static void bl_up_read(struct block_lock *lock) 246 { 247 spin_lock(&lock->lock); 248 BUG_ON(lock->count <= 0); 249 __del_holder(lock, current); 250 --lock->count; 251 if (!list_empty(&lock->waiters)) 252 __wake_many(lock); 253 spin_unlock(&lock->lock); 254 } 255 256 static int bl_down_write(struct block_lock *lock) 257 { 258 int r; 259 struct waiter w; 260 261 spin_lock(&lock->lock); 262 r = __check_holder(lock); 263 if (r) { 264 spin_unlock(&lock->lock); 265 return r; 266 } 267 268 if (lock->count == 0 && list_empty(&lock->waiters)) { 269 lock->count = -1; 270 __add_holder(lock, current); 271 spin_unlock(&lock->lock); 272 return 0; 273 } 274 275 get_task_struct(current); 276 w.task = current; 277 w.wants_write = 1; 278 279 /* 280 * Writers given priority. We know there's only one mutator in the 281 * system, so ignoring the ordering reversal. 282 */ 283 list_add(&w.list, &lock->waiters); 284 spin_unlock(&lock->lock); 285 286 __wait(&w); 287 put_task_struct(current); 288 289 return 0; 290 } 291 292 static void bl_up_write(struct block_lock *lock) 293 { 294 spin_lock(&lock->lock); 295 __del_holder(lock, current); 296 lock->count = 0; 297 if (!list_empty(&lock->waiters)) 298 __wake_many(lock); 299 spin_unlock(&lock->lock); 300 } 301 302 static void report_recursive_bug(dm_block_t b, int r) 303 { 304 if (r == -EINVAL) 305 DMERR("recursive acquisition of block %llu requested.", 306 (unsigned long long) b); 307 } 308 309 #else /* !CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING */ 310 311 #define bl_init(x) do { } while (0) 312 #define bl_down_read(x) 0 313 #define bl_down_read_nonblock(x) 0 314 #define bl_up_read(x) do { } while (0) 315 #define bl_down_write(x) 0 316 #define bl_up_write(x) do { } while (0) 317 #define report_recursive_bug(x, y) do { } while (0) 318 319 #endif /* CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING */ 320 321 /*----------------------------------------------------------------*/ 322 323 /* 324 * Block manager is currently implemented using dm-bufio. struct 325 * dm_block_manager and struct dm_block map directly onto a couple of 326 * structs in the bufio interface. I want to retain the freedom to move 327 * away from bufio in the future. So these structs are just cast within 328 * this .c file, rather than making it through to the public interface. 329 */ 330 static struct dm_buffer *to_buffer(struct dm_block *b) 331 { 332 return (struct dm_buffer *) b; 333 } 334 335 dm_block_t dm_block_location(struct dm_block *b) 336 { 337 return dm_bufio_get_block_number(to_buffer(b)); 338 } 339 EXPORT_SYMBOL_GPL(dm_block_location); 340 341 void *dm_block_data(struct dm_block *b) 342 { 343 return dm_bufio_get_block_data(to_buffer(b)); 344 } 345 EXPORT_SYMBOL_GPL(dm_block_data); 346 347 struct buffer_aux { 348 struct dm_block_validator *validator; 349 int write_locked; 350 351 #ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING 352 struct block_lock lock; 353 #endif 354 }; 355 356 static void dm_block_manager_alloc_callback(struct dm_buffer *buf) 357 { 358 struct buffer_aux *aux = dm_bufio_get_aux_data(buf); 359 360 aux->validator = NULL; 361 bl_init(&aux->lock); 362 } 363 364 static void dm_block_manager_write_callback(struct dm_buffer *buf) 365 { 366 struct buffer_aux *aux = dm_bufio_get_aux_data(buf); 367 368 if (aux->validator) { 369 aux->validator->prepare_for_write(aux->validator, (struct dm_block *) buf, 370 dm_bufio_get_block_size(dm_bufio_get_client(buf))); 371 } 372 } 373 374 /* 375 * ------------------------------------------------------------- 376 * Public interface 377 *-------------------------------------------------------------- 378 */ 379 struct dm_block_manager { 380 struct dm_bufio_client *bufio; 381 bool read_only:1; 382 }; 383 384 struct dm_block_manager *dm_block_manager_create(struct block_device *bdev, 385 unsigned int block_size, 386 unsigned int max_held_per_thread) 387 { 388 int r; 389 struct dm_block_manager *bm; 390 391 bm = kmalloc(sizeof(*bm), GFP_KERNEL); 392 if (!bm) { 393 r = -ENOMEM; 394 goto bad; 395 } 396 397 bm->bufio = dm_bufio_client_create(bdev, block_size, max_held_per_thread, 398 sizeof(struct buffer_aux), 399 dm_block_manager_alloc_callback, 400 dm_block_manager_write_callback, 401 0); 402 if (IS_ERR(bm->bufio)) { 403 r = PTR_ERR(bm->bufio); 404 kfree(bm); 405 goto bad; 406 } 407 408 bm->read_only = false; 409 410 return bm; 411 412 bad: 413 return ERR_PTR(r); 414 } 415 EXPORT_SYMBOL_GPL(dm_block_manager_create); 416 417 void dm_block_manager_destroy(struct dm_block_manager *bm) 418 { 419 dm_bufio_client_destroy(bm->bufio); 420 kfree(bm); 421 } 422 EXPORT_SYMBOL_GPL(dm_block_manager_destroy); 423 424 void dm_block_manager_reset(struct dm_block_manager *bm) 425 { 426 dm_bufio_client_reset(bm->bufio); 427 } 428 EXPORT_SYMBOL_GPL(dm_block_manager_reset); 429 430 unsigned int dm_bm_block_size(struct dm_block_manager *bm) 431 { 432 return dm_bufio_get_block_size(bm->bufio); 433 } 434 EXPORT_SYMBOL_GPL(dm_bm_block_size); 435 436 dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm) 437 { 438 return dm_bufio_get_device_size(bm->bufio); 439 } 440 441 static int dm_bm_validate_buffer(struct dm_block_manager *bm, 442 struct dm_buffer *buf, 443 struct buffer_aux *aux, 444 struct dm_block_validator *v) 445 { 446 if (unlikely(!aux->validator)) { 447 int r; 448 449 if (!v) 450 return 0; 451 r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(bm->bufio)); 452 if (unlikely(r)) { 453 DMERR_LIMIT("%s validator check failed for block %llu", v->name, 454 (unsigned long long) dm_bufio_get_block_number(buf)); 455 return r; 456 } 457 aux->validator = v; 458 } else { 459 if (unlikely(aux->validator != v)) { 460 DMERR_LIMIT("validator mismatch (old=%s vs new=%s) for block %llu", 461 aux->validator->name, v ? v->name : "NULL", 462 (unsigned long long) dm_bufio_get_block_number(buf)); 463 return -EINVAL; 464 } 465 } 466 467 return 0; 468 } 469 int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b, 470 struct dm_block_validator *v, 471 struct dm_block **result) 472 { 473 struct buffer_aux *aux; 474 void *p; 475 int r; 476 477 p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); 478 if (IS_ERR(p)) 479 return PTR_ERR(p); 480 481 aux = dm_bufio_get_aux_data(to_buffer(*result)); 482 r = bl_down_read(&aux->lock); 483 if (unlikely(r)) { 484 dm_bufio_release(to_buffer(*result)); 485 report_recursive_bug(b, r); 486 return r; 487 } 488 489 aux->write_locked = 0; 490 491 r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v); 492 if (unlikely(r)) { 493 bl_up_read(&aux->lock); 494 dm_bufio_release(to_buffer(*result)); 495 return r; 496 } 497 498 return 0; 499 } 500 EXPORT_SYMBOL_GPL(dm_bm_read_lock); 501 502 int dm_bm_write_lock(struct dm_block_manager *bm, 503 dm_block_t b, struct dm_block_validator *v, 504 struct dm_block **result) 505 { 506 struct buffer_aux *aux; 507 void *p; 508 int r; 509 510 if (dm_bm_is_read_only(bm)) 511 return -EPERM; 512 513 p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); 514 if (IS_ERR(p)) 515 return PTR_ERR(p); 516 517 aux = dm_bufio_get_aux_data(to_buffer(*result)); 518 r = bl_down_write(&aux->lock); 519 if (r) { 520 dm_bufio_release(to_buffer(*result)); 521 report_recursive_bug(b, r); 522 return r; 523 } 524 525 aux->write_locked = 1; 526 527 r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v); 528 if (unlikely(r)) { 529 bl_up_write(&aux->lock); 530 dm_bufio_release(to_buffer(*result)); 531 return r; 532 } 533 534 return 0; 535 } 536 EXPORT_SYMBOL_GPL(dm_bm_write_lock); 537 538 int dm_bm_read_try_lock(struct dm_block_manager *bm, 539 dm_block_t b, struct dm_block_validator *v, 540 struct dm_block **result) 541 { 542 struct buffer_aux *aux; 543 void *p; 544 int r; 545 546 p = dm_bufio_get(bm->bufio, b, (struct dm_buffer **) result); 547 if (IS_ERR(p)) 548 return PTR_ERR(p); 549 if (unlikely(!p)) 550 return -EWOULDBLOCK; 551 552 aux = dm_bufio_get_aux_data(to_buffer(*result)); 553 r = bl_down_read_nonblock(&aux->lock); 554 if (r < 0) { 555 dm_bufio_release(to_buffer(*result)); 556 report_recursive_bug(b, r); 557 return r; 558 } 559 aux->write_locked = 0; 560 561 r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v); 562 if (unlikely(r)) { 563 bl_up_read(&aux->lock); 564 dm_bufio_release(to_buffer(*result)); 565 return r; 566 } 567 568 return 0; 569 } 570 571 int dm_bm_write_lock_zero(struct dm_block_manager *bm, 572 dm_block_t b, struct dm_block_validator *v, 573 struct dm_block **result) 574 { 575 int r; 576 struct buffer_aux *aux; 577 void *p; 578 579 if (dm_bm_is_read_only(bm)) 580 return -EPERM; 581 582 p = dm_bufio_new(bm->bufio, b, (struct dm_buffer **) result); 583 if (IS_ERR(p)) 584 return PTR_ERR(p); 585 586 memset(p, 0, dm_bm_block_size(bm)); 587 588 aux = dm_bufio_get_aux_data(to_buffer(*result)); 589 r = bl_down_write(&aux->lock); 590 if (r) { 591 dm_bufio_release(to_buffer(*result)); 592 return r; 593 } 594 595 aux->write_locked = 1; 596 aux->validator = v; 597 598 return 0; 599 } 600 EXPORT_SYMBOL_GPL(dm_bm_write_lock_zero); 601 602 void dm_bm_unlock(struct dm_block *b) 603 { 604 struct buffer_aux *aux = dm_bufio_get_aux_data(to_buffer(b)); 605 606 if (aux->write_locked) { 607 dm_bufio_mark_buffer_dirty(to_buffer(b)); 608 bl_up_write(&aux->lock); 609 } else 610 bl_up_read(&aux->lock); 611 612 dm_bufio_release(to_buffer(b)); 613 } 614 EXPORT_SYMBOL_GPL(dm_bm_unlock); 615 616 int dm_bm_flush(struct dm_block_manager *bm) 617 { 618 if (dm_bm_is_read_only(bm)) 619 return -EPERM; 620 621 return dm_bufio_write_dirty_buffers(bm->bufio); 622 } 623 EXPORT_SYMBOL_GPL(dm_bm_flush); 624 625 void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b) 626 { 627 dm_bufio_prefetch(bm->bufio, b, 1); 628 } 629 630 bool dm_bm_is_read_only(struct dm_block_manager *bm) 631 { 632 return bm ? bm->read_only : true; 633 } 634 EXPORT_SYMBOL_GPL(dm_bm_is_read_only); 635 636 void dm_bm_set_read_only(struct dm_block_manager *bm) 637 { 638 if (bm) 639 bm->read_only = true; 640 } 641 EXPORT_SYMBOL_GPL(dm_bm_set_read_only); 642 643 void dm_bm_set_read_write(struct dm_block_manager *bm) 644 { 645 if (bm) 646 bm->read_only = false; 647 } 648 EXPORT_SYMBOL_GPL(dm_bm_set_read_write); 649 650 u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor) 651 { 652 return crc32c(~(u32) 0, data, len) ^ init_xor; 653 } 654 EXPORT_SYMBOL_GPL(dm_bm_checksum); 655 656 /*----------------------------------------------------------------*/ 657 658 MODULE_LICENSE("GPL"); 659 MODULE_AUTHOR("Joe Thornber <dm-devel@lists.linux.dev>"); 660 MODULE_DESCRIPTION("Immutable metadata library for dm"); 661 662 /*----------------------------------------------------------------*/ 663