1 /* 2 * Copyright (C) 2011 Red Hat, Inc. 3 * 4 * This file is released under the GPL. 5 */ 6 #include "dm-block-manager.h" 7 #include "dm-persistent-data-internal.h" 8 #include "../dm-bufio.h" 9 10 #include <linux/crc32c.h> 11 #include <linux/module.h> 12 #include <linux/slab.h> 13 #include <linux/rwsem.h> 14 #include <linux/device-mapper.h> 15 #include <linux/stacktrace.h> 16 #include <linux/sched/task.h> 17 18 #define DM_MSG_PREFIX "block manager" 19 20 /*----------------------------------------------------------------*/ 21 22 #ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING 23 24 /* 25 * This is a read/write semaphore with a couple of differences. 26 * 27 * i) There is a restriction on the number of concurrent read locks that 28 * may be held at once. This is just an implementation detail. 29 * 30 * ii) Recursive locking attempts are detected and return EINVAL. A stack 31 * trace is also emitted for the previous lock acquisition. 32 * 33 * iii) Priority is given to write locks. 34 */ 35 #define MAX_HOLDERS 4 36 #define MAX_STACK 10 37 38 typedef unsigned long stack_entries[MAX_STACK]; 39 40 struct block_lock { 41 spinlock_t lock; 42 __s32 count; 43 struct list_head waiters; 44 struct task_struct *holders[MAX_HOLDERS]; 45 46 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 47 struct stack_trace traces[MAX_HOLDERS]; 48 stack_entries entries[MAX_HOLDERS]; 49 #endif 50 }; 51 52 struct waiter { 53 struct list_head list; 54 struct task_struct *task; 55 int wants_write; 56 }; 57 58 static unsigned __find_holder(struct block_lock *lock, 59 struct task_struct *task) 60 { 61 unsigned i; 62 63 for (i = 0; i < MAX_HOLDERS; i++) 64 if (lock->holders[i] == task) 65 break; 66 67 BUG_ON(i == MAX_HOLDERS); 68 return i; 69 } 70 71 /* call this *after* you increment lock->count */ 72 static void __add_holder(struct block_lock *lock, struct task_struct *task) 73 { 74 unsigned h = __find_holder(lock, NULL); 75 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 76 struct stack_trace *t; 77 #endif 78 79 get_task_struct(task); 80 lock->holders[h] = task; 81 82 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 83 t = lock->traces + h; 84 t->nr_entries = 0; 85 t->max_entries = MAX_STACK; 86 t->entries = lock->entries[h]; 87 t->skip = 2; 88 save_stack_trace(t); 89 #endif 90 } 91 92 /* call this *before* you decrement lock->count */ 93 static void __del_holder(struct block_lock *lock, struct task_struct *task) 94 { 95 unsigned h = __find_holder(lock, task); 96 lock->holders[h] = NULL; 97 put_task_struct(task); 98 } 99 100 static int __check_holder(struct block_lock *lock) 101 { 102 unsigned i; 103 104 for (i = 0; i < MAX_HOLDERS; i++) { 105 if (lock->holders[i] == current) { 106 DMERR("recursive lock detected in metadata"); 107 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 108 DMERR("previously held here:"); 109 print_stack_trace(lock->traces + i, 4); 110 111 DMERR("subsequent acquisition attempted here:"); 112 dump_stack(); 113 #endif 114 return -EINVAL; 115 } 116 } 117 118 return 0; 119 } 120 121 static void __wait(struct waiter *w) 122 { 123 for (;;) { 124 set_current_state(TASK_UNINTERRUPTIBLE); 125 126 if (!w->task) 127 break; 128 129 schedule(); 130 } 131 132 set_current_state(TASK_RUNNING); 133 } 134 135 static void __wake_waiter(struct waiter *w) 136 { 137 struct task_struct *task; 138 139 list_del(&w->list); 140 task = w->task; 141 smp_mb(); 142 w->task = NULL; 143 wake_up_process(task); 144 } 145 146 /* 147 * We either wake a few readers or a single writer. 148 */ 149 static void __wake_many(struct block_lock *lock) 150 { 151 struct waiter *w, *tmp; 152 153 BUG_ON(lock->count < 0); 154 list_for_each_entry_safe(w, tmp, &lock->waiters, list) { 155 if (lock->count >= MAX_HOLDERS) 156 return; 157 158 if (w->wants_write) { 159 if (lock->count > 0) 160 return; /* still read locked */ 161 162 lock->count = -1; 163 __add_holder(lock, w->task); 164 __wake_waiter(w); 165 return; 166 } 167 168 lock->count++; 169 __add_holder(lock, w->task); 170 __wake_waiter(w); 171 } 172 } 173 174 static void bl_init(struct block_lock *lock) 175 { 176 int i; 177 178 spin_lock_init(&lock->lock); 179 lock->count = 0; 180 INIT_LIST_HEAD(&lock->waiters); 181 for (i = 0; i < MAX_HOLDERS; i++) 182 lock->holders[i] = NULL; 183 } 184 185 static int __available_for_read(struct block_lock *lock) 186 { 187 return lock->count >= 0 && 188 lock->count < MAX_HOLDERS && 189 list_empty(&lock->waiters); 190 } 191 192 static int bl_down_read(struct block_lock *lock) 193 { 194 int r; 195 struct waiter w; 196 197 spin_lock(&lock->lock); 198 r = __check_holder(lock); 199 if (r) { 200 spin_unlock(&lock->lock); 201 return r; 202 } 203 204 if (__available_for_read(lock)) { 205 lock->count++; 206 __add_holder(lock, current); 207 spin_unlock(&lock->lock); 208 return 0; 209 } 210 211 get_task_struct(current); 212 213 w.task = current; 214 w.wants_write = 0; 215 list_add_tail(&w.list, &lock->waiters); 216 spin_unlock(&lock->lock); 217 218 __wait(&w); 219 put_task_struct(current); 220 return 0; 221 } 222 223 static int bl_down_read_nonblock(struct block_lock *lock) 224 { 225 int r; 226 227 spin_lock(&lock->lock); 228 r = __check_holder(lock); 229 if (r) 230 goto out; 231 232 if (__available_for_read(lock)) { 233 lock->count++; 234 __add_holder(lock, current); 235 r = 0; 236 } else 237 r = -EWOULDBLOCK; 238 239 out: 240 spin_unlock(&lock->lock); 241 return r; 242 } 243 244 static void bl_up_read(struct block_lock *lock) 245 { 246 spin_lock(&lock->lock); 247 BUG_ON(lock->count <= 0); 248 __del_holder(lock, current); 249 --lock->count; 250 if (!list_empty(&lock->waiters)) 251 __wake_many(lock); 252 spin_unlock(&lock->lock); 253 } 254 255 static int bl_down_write(struct block_lock *lock) 256 { 257 int r; 258 struct waiter w; 259 260 spin_lock(&lock->lock); 261 r = __check_holder(lock); 262 if (r) { 263 spin_unlock(&lock->lock); 264 return r; 265 } 266 267 if (lock->count == 0 && list_empty(&lock->waiters)) { 268 lock->count = -1; 269 __add_holder(lock, current); 270 spin_unlock(&lock->lock); 271 return 0; 272 } 273 274 get_task_struct(current); 275 w.task = current; 276 w.wants_write = 1; 277 278 /* 279 * Writers given priority. We know there's only one mutator in the 280 * system, so ignoring the ordering reversal. 281 */ 282 list_add(&w.list, &lock->waiters); 283 spin_unlock(&lock->lock); 284 285 __wait(&w); 286 put_task_struct(current); 287 288 return 0; 289 } 290 291 static void bl_up_write(struct block_lock *lock) 292 { 293 spin_lock(&lock->lock); 294 __del_holder(lock, current); 295 lock->count = 0; 296 if (!list_empty(&lock->waiters)) 297 __wake_many(lock); 298 spin_unlock(&lock->lock); 299 } 300 301 static void report_recursive_bug(dm_block_t b, int r) 302 { 303 if (r == -EINVAL) 304 DMERR("recursive acquisition of block %llu requested.", 305 (unsigned long long) b); 306 } 307 308 #else /* !CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING */ 309 310 #define bl_init(x) do { } while (0) 311 #define bl_down_read(x) 0 312 #define bl_down_read_nonblock(x) 0 313 #define bl_up_read(x) do { } while (0) 314 #define bl_down_write(x) 0 315 #define bl_up_write(x) do { } while (0) 316 #define report_recursive_bug(x, y) do { } while (0) 317 318 #endif /* CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING */ 319 320 /*----------------------------------------------------------------*/ 321 322 /* 323 * Block manager is currently implemented using dm-bufio. struct 324 * dm_block_manager and struct dm_block map directly onto a couple of 325 * structs in the bufio interface. I want to retain the freedom to move 326 * away from bufio in the future. So these structs are just cast within 327 * this .c file, rather than making it through to the public interface. 328 */ 329 static struct dm_buffer *to_buffer(struct dm_block *b) 330 { 331 return (struct dm_buffer *) b; 332 } 333 334 dm_block_t dm_block_location(struct dm_block *b) 335 { 336 return dm_bufio_get_block_number(to_buffer(b)); 337 } 338 EXPORT_SYMBOL_GPL(dm_block_location); 339 340 void *dm_block_data(struct dm_block *b) 341 { 342 return dm_bufio_get_block_data(to_buffer(b)); 343 } 344 EXPORT_SYMBOL_GPL(dm_block_data); 345 346 struct buffer_aux { 347 struct dm_block_validator *validator; 348 int write_locked; 349 350 #ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING 351 struct block_lock lock; 352 #endif 353 }; 354 355 static void dm_block_manager_alloc_callback(struct dm_buffer *buf) 356 { 357 struct buffer_aux *aux = dm_bufio_get_aux_data(buf); 358 aux->validator = NULL; 359 bl_init(&aux->lock); 360 } 361 362 static void dm_block_manager_write_callback(struct dm_buffer *buf) 363 { 364 struct buffer_aux *aux = dm_bufio_get_aux_data(buf); 365 if (aux->validator) { 366 aux->validator->prepare_for_write(aux->validator, (struct dm_block *) buf, 367 dm_bufio_get_block_size(dm_bufio_get_client(buf))); 368 } 369 } 370 371 /*---------------------------------------------------------------- 372 * Public interface 373 *--------------------------------------------------------------*/ 374 struct dm_block_manager { 375 struct dm_bufio_client *bufio; 376 bool read_only:1; 377 }; 378 379 struct dm_block_manager *dm_block_manager_create(struct block_device *bdev, 380 unsigned block_size, 381 unsigned cache_size, 382 unsigned max_held_per_thread) 383 { 384 int r; 385 struct dm_block_manager *bm; 386 387 bm = kmalloc(sizeof(*bm), GFP_KERNEL); 388 if (!bm) { 389 r = -ENOMEM; 390 goto bad; 391 } 392 393 bm->bufio = dm_bufio_client_create(bdev, block_size, max_held_per_thread, 394 sizeof(struct buffer_aux), 395 dm_block_manager_alloc_callback, 396 dm_block_manager_write_callback); 397 if (IS_ERR(bm->bufio)) { 398 r = PTR_ERR(bm->bufio); 399 kfree(bm); 400 goto bad; 401 } 402 403 bm->read_only = false; 404 405 return bm; 406 407 bad: 408 return ERR_PTR(r); 409 } 410 EXPORT_SYMBOL_GPL(dm_block_manager_create); 411 412 void dm_block_manager_destroy(struct dm_block_manager *bm) 413 { 414 dm_bufio_client_destroy(bm->bufio); 415 kfree(bm); 416 } 417 EXPORT_SYMBOL_GPL(dm_block_manager_destroy); 418 419 unsigned dm_bm_block_size(struct dm_block_manager *bm) 420 { 421 return dm_bufio_get_block_size(bm->bufio); 422 } 423 EXPORT_SYMBOL_GPL(dm_bm_block_size); 424 425 dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm) 426 { 427 return dm_bufio_get_device_size(bm->bufio); 428 } 429 430 static int dm_bm_validate_buffer(struct dm_block_manager *bm, 431 struct dm_buffer *buf, 432 struct buffer_aux *aux, 433 struct dm_block_validator *v) 434 { 435 if (unlikely(!aux->validator)) { 436 int r; 437 if (!v) 438 return 0; 439 r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(bm->bufio)); 440 if (unlikely(r)) { 441 DMERR_LIMIT("%s validator check failed for block %llu", v->name, 442 (unsigned long long) dm_bufio_get_block_number(buf)); 443 return r; 444 } 445 aux->validator = v; 446 } else { 447 if (unlikely(aux->validator != v)) { 448 DMERR_LIMIT("validator mismatch (old=%s vs new=%s) for block %llu", 449 aux->validator->name, v ? v->name : "NULL", 450 (unsigned long long) dm_bufio_get_block_number(buf)); 451 return -EINVAL; 452 } 453 } 454 455 return 0; 456 } 457 int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b, 458 struct dm_block_validator *v, 459 struct dm_block **result) 460 { 461 struct buffer_aux *aux; 462 void *p; 463 int r; 464 465 p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); 466 if (unlikely(IS_ERR(p))) 467 return PTR_ERR(p); 468 469 aux = dm_bufio_get_aux_data(to_buffer(*result)); 470 r = bl_down_read(&aux->lock); 471 if (unlikely(r)) { 472 dm_bufio_release(to_buffer(*result)); 473 report_recursive_bug(b, r); 474 return r; 475 } 476 477 aux->write_locked = 0; 478 479 r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v); 480 if (unlikely(r)) { 481 bl_up_read(&aux->lock); 482 dm_bufio_release(to_buffer(*result)); 483 return r; 484 } 485 486 return 0; 487 } 488 EXPORT_SYMBOL_GPL(dm_bm_read_lock); 489 490 int dm_bm_write_lock(struct dm_block_manager *bm, 491 dm_block_t b, struct dm_block_validator *v, 492 struct dm_block **result) 493 { 494 struct buffer_aux *aux; 495 void *p; 496 int r; 497 498 if (bm->read_only) 499 return -EPERM; 500 501 p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); 502 if (unlikely(IS_ERR(p))) 503 return PTR_ERR(p); 504 505 aux = dm_bufio_get_aux_data(to_buffer(*result)); 506 r = bl_down_write(&aux->lock); 507 if (r) { 508 dm_bufio_release(to_buffer(*result)); 509 report_recursive_bug(b, r); 510 return r; 511 } 512 513 aux->write_locked = 1; 514 515 r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v); 516 if (unlikely(r)) { 517 bl_up_write(&aux->lock); 518 dm_bufio_release(to_buffer(*result)); 519 return r; 520 } 521 522 return 0; 523 } 524 EXPORT_SYMBOL_GPL(dm_bm_write_lock); 525 526 int dm_bm_read_try_lock(struct dm_block_manager *bm, 527 dm_block_t b, struct dm_block_validator *v, 528 struct dm_block **result) 529 { 530 struct buffer_aux *aux; 531 void *p; 532 int r; 533 534 p = dm_bufio_get(bm->bufio, b, (struct dm_buffer **) result); 535 if (unlikely(IS_ERR(p))) 536 return PTR_ERR(p); 537 if (unlikely(!p)) 538 return -EWOULDBLOCK; 539 540 aux = dm_bufio_get_aux_data(to_buffer(*result)); 541 r = bl_down_read_nonblock(&aux->lock); 542 if (r < 0) { 543 dm_bufio_release(to_buffer(*result)); 544 report_recursive_bug(b, r); 545 return r; 546 } 547 aux->write_locked = 0; 548 549 r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v); 550 if (unlikely(r)) { 551 bl_up_read(&aux->lock); 552 dm_bufio_release(to_buffer(*result)); 553 return r; 554 } 555 556 return 0; 557 } 558 559 int dm_bm_write_lock_zero(struct dm_block_manager *bm, 560 dm_block_t b, struct dm_block_validator *v, 561 struct dm_block **result) 562 { 563 int r; 564 struct buffer_aux *aux; 565 void *p; 566 567 if (bm->read_only) 568 return -EPERM; 569 570 p = dm_bufio_new(bm->bufio, b, (struct dm_buffer **) result); 571 if (unlikely(IS_ERR(p))) 572 return PTR_ERR(p); 573 574 memset(p, 0, dm_bm_block_size(bm)); 575 576 aux = dm_bufio_get_aux_data(to_buffer(*result)); 577 r = bl_down_write(&aux->lock); 578 if (r) { 579 dm_bufio_release(to_buffer(*result)); 580 return r; 581 } 582 583 aux->write_locked = 1; 584 aux->validator = v; 585 586 return 0; 587 } 588 EXPORT_SYMBOL_GPL(dm_bm_write_lock_zero); 589 590 void dm_bm_unlock(struct dm_block *b) 591 { 592 struct buffer_aux *aux; 593 aux = dm_bufio_get_aux_data(to_buffer(b)); 594 595 if (aux->write_locked) { 596 dm_bufio_mark_buffer_dirty(to_buffer(b)); 597 bl_up_write(&aux->lock); 598 } else 599 bl_up_read(&aux->lock); 600 601 dm_bufio_release(to_buffer(b)); 602 } 603 EXPORT_SYMBOL_GPL(dm_bm_unlock); 604 605 int dm_bm_flush(struct dm_block_manager *bm) 606 { 607 if (bm->read_only) 608 return -EPERM; 609 610 return dm_bufio_write_dirty_buffers(bm->bufio); 611 } 612 EXPORT_SYMBOL_GPL(dm_bm_flush); 613 614 void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b) 615 { 616 dm_bufio_prefetch(bm->bufio, b, 1); 617 } 618 619 bool dm_bm_is_read_only(struct dm_block_manager *bm) 620 { 621 return bm->read_only; 622 } 623 EXPORT_SYMBOL_GPL(dm_bm_is_read_only); 624 625 void dm_bm_set_read_only(struct dm_block_manager *bm) 626 { 627 bm->read_only = true; 628 } 629 EXPORT_SYMBOL_GPL(dm_bm_set_read_only); 630 631 void dm_bm_set_read_write(struct dm_block_manager *bm) 632 { 633 bm->read_only = false; 634 } 635 EXPORT_SYMBOL_GPL(dm_bm_set_read_write); 636 637 u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor) 638 { 639 return crc32c(~(u32) 0, data, len) ^ init_xor; 640 } 641 EXPORT_SYMBOL_GPL(dm_bm_checksum); 642 643 /*----------------------------------------------------------------*/ 644 645 MODULE_LICENSE("GPL"); 646 MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); 647 MODULE_DESCRIPTION("Immutable metadata library for dm"); 648 649 /*----------------------------------------------------------------*/ 650