1 /*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1996, 1997, 1998 5 * Sleepycat Software. All rights reserved. 6 */ 7 8 #include "config.h" 9 10 #ifndef lint 11 static const char sccsid[] = "@(#)lock.c 10.61 (Sleepycat) 1/3/99"; 12 #endif /* not lint */ 13 14 #ifndef NO_SYSTEM_INCLUDES 15 #include <sys/types.h> 16 17 #include <errno.h> 18 #include <string.h> 19 #endif 20 21 #include "db_int.h" 22 #include "shqueue.h" 23 #include "db_page.h" 24 #include "db_shash.h" 25 #include "lock.h" 26 #include "db_am.h" 27 #include "txn_auto.h" 28 #include "txn_ext.h" 29 #include "common_ext.h" 30 31 static void __lock_checklocker __P((DB_LOCKTAB *, struct __db_lock *, int)); 32 static void __lock_freeobj __P((DB_LOCKTAB *, DB_LOCKOBJ *)); 33 static int __lock_get_internal __P((DB_LOCKTAB *, u_int32_t, DB_TXN *, 34 u_int32_t, const DBT *, db_lockmode_t, struct __db_lock **)); 35 static int __lock_is_parent __P((u_int32_t, DB_TXN *)); 36 static int __lock_promote __P((DB_LOCKTAB *, DB_LOCKOBJ *)); 37 static int __lock_put_internal __P((DB_LOCKTAB *, struct __db_lock *, int)); 38 static void __lock_remove_waiter 39 __P((DB_LOCKTAB *, DB_LOCKOBJ *, struct __db_lock *, db_status_t)); 40 static int __lock_vec_internal __P((DB_LOCKTAB *, u_int32_t, DB_TXN *, 41 u_int32_t, DB_LOCKREQ *, int, DB_LOCKREQ **elistp)); 42 43 int 44 lock_id(lt, idp) 45 DB_LOCKTAB *lt; 46 u_int32_t *idp; 47 { 48 u_int32_t id; 49 50 LOCK_PANIC_CHECK(lt); 51 52 LOCK_LOCKREGION(lt); 53 if (lt->region->id >= DB_LOCK_MAXID) 54 lt->region->id = 0; 55 id = ++lt->region->id; 56 UNLOCK_LOCKREGION(lt); 57 58 *idp = id; 59 return (0); 60 } 61 62 int 63 lock_vec(lt, locker, flags, list, nlist, elistp) 64 DB_LOCKTAB *lt; 65 u_int32_t locker, flags; 66 int nlist; 67 DB_LOCKREQ *list, **elistp; 68 { 69 return (__lock_vec_internal(lt, 70 locker, NULL, flags, list, nlist, elistp)); 71 } 72 73 int 74 lock_tvec(lt, txn, flags, list, nlist, elistp) 75 DB_LOCKTAB *lt; 76 DB_TXN *txn; 77 u_int32_t flags; 78 int nlist; 79 DB_LOCKREQ *list, **elistp; 80 { 81 return (__lock_vec_internal(lt, 82 txn->txnid, txn, flags, list, nlist, elistp)); 83 } 84 85 static int 86 __lock_vec_internal(lt, locker, txn, flags, list, nlist, elistp) 87 DB_LOCKTAB *lt; 88 u_int32_t locker; 89 DB_TXN *txn; 90 u_int32_t flags; 91 int nlist; 92 DB_LOCKREQ *list, **elistp; 93 { 94 struct __db_lock *lp; 95 DB_LOCKOBJ *sh_obj, *sh_locker, *sh_parent; 96 int i, ret, run_dd; 97 98 LOCK_PANIC_CHECK(lt); 99 100 /* Validate arguments. */ 101 if ((ret = 102 __db_fchk(lt->dbenv, "lock_vec", flags, DB_LOCK_NOWAIT)) != 0) 103 return (ret); 104 105 LOCK_LOCKREGION(lt); 106 107 if ((ret = __lock_validate_region(lt)) != 0) { 108 UNLOCK_LOCKREGION(lt); 109 return (ret); 110 } 111 112 ret = 0; 113 for (i = 0; i < nlist && ret == 0; i++) { 114 switch (list[i].op) { 115 case DB_LOCK_GET: 116 ret = __lock_get_internal(lt, locker, txn, flags, 117 list[i].obj, list[i].mode, &lp); 118 if (ret == 0) { 119 list[i].lock = LOCK_TO_OFFSET(lt, lp); 120 lt->region->nrequests++; 121 } 122 break; 123 case DB_LOCK_INHERIT: 124 /* Find the locker. */ 125 if ((ret = __lock_getobj(lt, locker, 126 NULL, DB_LOCK_LOCKER, &sh_locker)) != 0) 127 break; 128 if (txn == NULL || txn->parent == NULL) { 129 ret = EINVAL; 130 break; 131 } 132 133 if ((ret = __lock_getobj(lt, txn->parent->txnid, 134 NULL, DB_LOCK_LOCKER, &sh_parent)) != 0) 135 break; 136 137 /* 138 * Traverse all the locks held by this locker. Remove 139 * the locks from the locker's list and put them on the 140 * parent's list. 141 */ 142 for (lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock); 143 lp != NULL; 144 lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock)) { 145 SH_LIST_REMOVE(lp, locker_links, __db_lock); 146 SH_LIST_INSERT_HEAD(&sh_parent->heldby, lp, 147 locker_links, __db_lock); 148 lp->holder = txn->parent->txnid; 149 } 150 __lock_freeobj(lt, sh_locker); 151 lt->region->nlockers--; 152 break; 153 case DB_LOCK_PUT: 154 lp = OFFSET_TO_LOCK(lt, list[i].lock); 155 if (lp->holder != locker) { 156 ret = DB_LOCK_NOTHELD; 157 break; 158 } 159 list[i].mode = lp->mode; 160 161 ret = __lock_put_internal(lt, lp, 0); 162 __lock_checklocker(lt, lp, 0); 163 break; 164 case DB_LOCK_PUT_ALL: 165 /* Find the locker. */ 166 if ((ret = __lock_getobj(lt, locker, 167 NULL, DB_LOCK_LOCKER, &sh_locker)) != 0) 168 break; 169 170 for (lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock); 171 lp != NULL; 172 lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock)) { 173 if ((ret = __lock_put_internal(lt, lp, 1)) != 0) 174 break; 175 } 176 __lock_freeobj(lt, sh_locker); 177 lt->region->nlockers--; 178 break; 179 case DB_LOCK_PUT_OBJ: 180 181 /* Look up the object in the hash table. */ 182 HASHLOOKUP(lt->hashtab, __db_lockobj, links, 183 list[i].obj, sh_obj, lt->region->table_size, 184 __lock_ohash, __lock_cmp); 185 if (sh_obj == NULL) { 186 ret = EINVAL; 187 break; 188 } 189 /* 190 * Release waiters first, because they won't cause 191 * anyone else to be awakened. If we release the 192 * lockers first, all the waiters get awakened 193 * needlessly. 194 */ 195 for (lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock); 196 lp != NULL; 197 lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock)) { 198 lt->region->nreleases += lp->refcount; 199 __lock_remove_waiter(lt, sh_obj, lp, 200 DB_LSTAT_NOGRANT); 201 __lock_checklocker(lt, lp, 1); 202 } 203 204 for (lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock); 205 lp != NULL; 206 lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock)) { 207 208 lt->region->nreleases += lp->refcount; 209 SH_LIST_REMOVE(lp, locker_links, __db_lock); 210 SH_TAILQ_REMOVE(&sh_obj->holders, lp, links, 211 __db_lock); 212 lp->status = DB_LSTAT_FREE; 213 SH_TAILQ_INSERT_HEAD(<->region->free_locks, 214 lp, links, __db_lock); 215 } 216 217 /* Now free the object. */ 218 __lock_freeobj(lt, sh_obj); 219 break; 220 #ifdef DEBUG 221 case DB_LOCK_DUMP: 222 /* Find the locker. */ 223 if ((ret = __lock_getobj(lt, locker, 224 NULL, DB_LOCK_LOCKER, &sh_locker)) != 0) 225 break; 226 227 for (lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock); 228 lp != NULL; 229 lp = SH_LIST_NEXT(lp, locker_links, __db_lock)) { 230 __lock_printlock(lt, lp, 1); 231 ret = EINVAL; 232 } 233 if (ret == 0) { 234 __lock_freeobj(lt, sh_locker); 235 lt->region->nlockers--; 236 } 237 break; 238 #endif 239 default: 240 ret = EINVAL; 241 break; 242 } 243 } 244 245 if (lt->region->need_dd && lt->region->detect != DB_LOCK_NORUN) { 246 run_dd = 1; 247 lt->region->need_dd = 0; 248 } else 249 run_dd = 0; 250 251 UNLOCK_LOCKREGION(lt); 252 253 if (ret == 0 && run_dd) 254 lock_detect(lt, 0, lt->region->detect); 255 256 if (elistp && ret != 0) 257 *elistp = &list[i - 1]; 258 return (ret); 259 } 260 261 int 262 lock_get(lt, locker, flags, obj, lock_mode, lock) 263 DB_LOCKTAB *lt; 264 u_int32_t locker, flags; 265 const DBT *obj; 266 db_lockmode_t lock_mode; 267 DB_LOCK *lock; 268 { 269 struct __db_lock *lockp; 270 int ret; 271 272 LOCK_PANIC_CHECK(lt); 273 274 /* Validate arguments. */ 275 if ((ret = __db_fchk(lt->dbenv, 276 "lock_get", flags, DB_LOCK_NOWAIT | DB_LOCK_UPGRADE)) != 0) 277 return (ret); 278 279 LOCK_LOCKREGION(lt); 280 281 if ((ret = __lock_validate_region(lt)) == 0) { 282 if (LF_ISSET(DB_LOCK_UPGRADE)) 283 lockp = OFFSET_TO_LOCK(lt, *lock); 284 285 if ((ret = __lock_get_internal(lt, 286 locker, NULL, flags, obj, lock_mode, &lockp)) == 0) { 287 if (!LF_ISSET(DB_LOCK_UPGRADE)) 288 *lock = LOCK_TO_OFFSET(lt, lockp); 289 lt->region->nrequests++; 290 } 291 } 292 293 UNLOCK_LOCKREGION(lt); 294 return (ret); 295 } 296 297 int 298 lock_tget(lt, txn, flags, obj, lock_mode, lock) 299 DB_LOCKTAB *lt; 300 DB_TXN *txn; 301 u_int32_t flags; 302 const DBT *obj; 303 db_lockmode_t lock_mode; 304 DB_LOCK *lock; 305 { 306 struct __db_lock *lockp; 307 int ret; 308 309 LOCK_PANIC_CHECK(lt); 310 311 /* Validate arguments. */ 312 if ((ret = __db_fchk(lt->dbenv, 313 "lock_get", flags, DB_LOCK_NOWAIT | DB_LOCK_UPGRADE)) != 0) 314 return (ret); 315 316 LOCK_LOCKREGION(lt); 317 318 if ((ret = __lock_validate_region(lt)) == 0) { 319 if (LF_ISSET(DB_LOCK_UPGRADE)) 320 lockp = OFFSET_TO_LOCK(lt, *lock); 321 322 if ((ret = __lock_get_internal(lt, 323 txn->txnid, txn, flags, obj, lock_mode, &lockp)) == 0) { 324 if (!LF_ISSET(DB_LOCK_UPGRADE)) 325 *lock = LOCK_TO_OFFSET(lt, lockp); 326 lt->region->nrequests++; 327 } 328 } 329 330 UNLOCK_LOCKREGION(lt); 331 return (ret); 332 } 333 int 334 lock_put(lt, lock) 335 DB_LOCKTAB *lt; 336 DB_LOCK lock; 337 { 338 struct __db_lock *lockp; 339 int ret, run_dd; 340 341 LOCK_PANIC_CHECK(lt); 342 343 LOCK_LOCKREGION(lt); 344 345 if ((ret = __lock_validate_region(lt)) != 0) 346 return (ret); 347 else { 348 lockp = OFFSET_TO_LOCK(lt, lock); 349 ret = __lock_put_internal(lt, lockp, 0); 350 } 351 352 __lock_checklocker(lt, lockp, 0); 353 354 if (lt->region->need_dd && lt->region->detect != DB_LOCK_NORUN) { 355 run_dd = 1; 356 lt->region->need_dd = 0; 357 } else 358 run_dd = 0; 359 360 UNLOCK_LOCKREGION(lt); 361 362 if (ret == 0 && run_dd) 363 lock_detect(lt, 0, lt->region->detect); 364 365 return (ret); 366 } 367 368 static int 369 __lock_put_internal(lt, lockp, do_all) 370 DB_LOCKTAB *lt; 371 struct __db_lock *lockp; 372 int do_all; 373 { 374 DB_LOCKOBJ *sh_obj; 375 int state_changed; 376 377 if (lockp->refcount == 0 || (lockp->status != DB_LSTAT_HELD && 378 lockp->status != DB_LSTAT_WAITING) || lockp->obj == 0) { 379 __db_err(lt->dbenv, "lock_put: invalid lock %lu", 380 (u_long)((u_int8_t *)lockp - (u_int8_t *)lt->region)); 381 return (EINVAL); 382 } 383 384 if (do_all) 385 lt->region->nreleases += lockp->refcount; 386 else 387 lt->region->nreleases++; 388 if (do_all == 0 && lockp->refcount > 1) { 389 lockp->refcount--; 390 return (0); 391 } 392 393 /* Get the object associated with this lock. */ 394 sh_obj = (DB_LOCKOBJ *)((u_int8_t *)lockp + lockp->obj); 395 396 /* Remove lock from locker list. */ 397 SH_LIST_REMOVE(lockp, locker_links, __db_lock); 398 399 /* Remove this lock from its holders/waitlist. */ 400 if (lockp->status != DB_LSTAT_HELD) 401 __lock_remove_waiter(lt, sh_obj, lockp, DB_LSTAT_FREE); 402 else 403 SH_TAILQ_REMOVE(&sh_obj->holders, lockp, links, __db_lock); 404 405 state_changed = __lock_promote(lt, sh_obj); 406 407 /* Check if object should be reclaimed. */ 408 if (SH_TAILQ_FIRST(&sh_obj->holders, __db_lock) == NULL) { 409 HASHREMOVE_EL(lt->hashtab, __db_lockobj, 410 links, sh_obj, lt->region->table_size, __lock_lhash); 411 if (sh_obj->lockobj.size > sizeof(sh_obj->objdata)) 412 __db_shalloc_free(lt->mem, 413 SH_DBT_PTR(&sh_obj->lockobj)); 414 SH_TAILQ_INSERT_HEAD(<->region->free_objs, sh_obj, links, 415 __db_lockobj); 416 state_changed = 1; 417 } 418 419 /* Free lock. */ 420 lockp->status = DB_LSTAT_FREE; 421 SH_TAILQ_INSERT_HEAD(<->region->free_locks, lockp, links, __db_lock); 422 423 /* 424 * If we did not promote anyone; we need to run the deadlock 425 * detector again. 426 */ 427 if (state_changed == 0) 428 lt->region->need_dd = 1; 429 430 return (0); 431 } 432 433 static int 434 __lock_get_internal(lt, locker, txn, flags, obj, lock_mode, lockp) 435 DB_LOCKTAB *lt; 436 u_int32_t locker, flags; 437 DB_TXN *txn; 438 const DBT *obj; 439 db_lockmode_t lock_mode; 440 struct __db_lock **lockp; 441 { 442 struct __db_lock *newl, *lp; 443 DB_LOCKOBJ *sh_obj, *sh_locker; 444 DB_LOCKREGION *lrp; 445 size_t newl_off; 446 int ihold, no_dd, ret; 447 448 no_dd = ret = 0; 449 450 /* 451 * Check that lock mode is valid. 452 */ 453 lrp = lt->region; 454 if ((u_int32_t)lock_mode >= lrp->nmodes) { 455 __db_err(lt->dbenv, 456 "lock_get: invalid lock mode %lu\n", (u_long)lock_mode); 457 return (EINVAL); 458 } 459 460 /* Allocate a new lock. Optimize for the common case of a grant. */ 461 if ((newl = SH_TAILQ_FIRST(&lrp->free_locks, __db_lock)) == NULL) { 462 if ((ret = __lock_grow_region(lt, DB_LOCK_LOCK, 0)) != 0) 463 return (ret); 464 lrp = lt->region; 465 newl = SH_TAILQ_FIRST(&lrp->free_locks, __db_lock); 466 } 467 newl_off = LOCK_TO_OFFSET(lt, newl); 468 469 /* Optimize for common case of granting a lock. */ 470 SH_TAILQ_REMOVE(&lrp->free_locks, newl, links, __db_lock); 471 472 newl->mode = lock_mode; 473 newl->status = DB_LSTAT_HELD; 474 newl->holder = locker; 475 newl->refcount = 1; 476 477 if ((ret = __lock_getobj(lt, 0, obj, DB_LOCK_OBJTYPE, &sh_obj)) != 0) 478 return (ret); 479 480 lrp = lt->region; /* getobj might have grown */ 481 newl = OFFSET_TO_LOCK(lt, newl_off); 482 483 /* Now make new lock point to object */ 484 newl->obj = SH_PTR_TO_OFF(newl, sh_obj); 485 486 /* 487 * Now we have a lock and an object and we need to see if we should 488 * grant the lock. We use a FIFO ordering so we can only grant a 489 * new lock if it does not conflict with anyone on the holders list 490 * OR anyone on the waiters list. The reason that we don't grant if 491 * there's a conflict is that this can lead to starvation (a writer 492 * waiting on a popularly read item will never be granted). The 493 * downside of this is that a waiting reader can prevent an upgrade 494 * from reader to writer, which is not uncommon. 495 * 496 * There is one exception to the no-conflict rule. If a lock is held 497 * by the requesting locker AND the new lock does not conflict with 498 * any other holders, then we grant the lock. The most common place 499 * this happens is when the holder has a WRITE lock and a READ lock 500 * request comes in for the same locker. If we do not grant the read 501 * lock, then we guarantee deadlock. 502 * 503 * In case of conflict, we put the new lock on the end of the waiters 504 * list, unless we are upgrading in which case the locker goes on the 505 * front of the list. 506 */ 507 ihold = 0; 508 for (lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock); 509 lp != NULL; 510 lp = SH_TAILQ_NEXT(lp, links, __db_lock)) { 511 if (locker == lp->holder || 512 __lock_is_parent(lp->holder, txn)) { 513 if (lp->mode == lock_mode && 514 lp->status == DB_LSTAT_HELD) { 515 if (LF_ISSET(DB_LOCK_UPGRADE)) 516 goto upgrade; 517 518 /* 519 * Lock is held, so we can increment the 520 * reference count and return this lock. 521 */ 522 lp->refcount++; 523 *lockp = lp; 524 SH_TAILQ_INSERT_HEAD(&lrp->free_locks, 525 newl, links, __db_lock); 526 return (0); 527 } else 528 ihold = 1; 529 } else if (CONFLICTS(lt, lp->mode, lock_mode)) 530 break; 531 } 532 533 /* 534 * If we are upgrading, then there are two scenarios. Either 535 * we had no conflicts, so we can do the upgrade. Or, there 536 * is a conflict and we should wait at the HEAD of the waiters 537 * list. 538 */ 539 if (LF_ISSET(DB_LOCK_UPGRADE)) { 540 if (lp == NULL) 541 goto upgrade; 542 543 /* There was a conflict, wait. */ 544 SH_TAILQ_INSERT_HEAD(&sh_obj->waiters, newl, links, __db_lock); 545 goto wait; 546 } 547 548 if (lp == NULL && !ihold) 549 for (lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock); 550 lp != NULL; 551 lp = SH_TAILQ_NEXT(lp, links, __db_lock)) { 552 if (CONFLICTS(lt, lp->mode, lock_mode) && 553 locker != lp->holder) 554 break; 555 } 556 if (lp == NULL) 557 SH_TAILQ_INSERT_TAIL(&sh_obj->holders, newl, links); 558 else if (!(flags & DB_LOCK_NOWAIT)) 559 SH_TAILQ_INSERT_TAIL(&sh_obj->waiters, newl, links); 560 else { 561 /* Free the lock and return an error. */ 562 newl->status = DB_LSTAT_FREE; 563 SH_TAILQ_INSERT_HEAD(&lrp->free_locks, newl, links, __db_lock); 564 return (DB_LOCK_NOTGRANTED); 565 } 566 567 /* 568 * Now, insert the lock onto its locker's list. If the locker does 569 * not currently hold any locks, there's no reason to run a deadlock 570 * detector, save that information. 571 */ 572 if ((ret = 573 __lock_getobj(lt, locker, NULL, DB_LOCK_LOCKER, &sh_locker)) != 0) 574 return (ret); 575 no_dd = SH_LIST_FIRST(&sh_locker->heldby, __db_lock) == NULL; 576 577 lrp = lt->region; 578 SH_LIST_INSERT_HEAD(&sh_locker->heldby, newl, locker_links, __db_lock); 579 580 if (lp != NULL) { 581 /* 582 * This is really a blocker for the process, so initialize it 583 * set. That way the current process will block when it tries 584 * to get it and the waking process will release it. 585 */ 586 wait: (void)__db_mutex_init(&newl->mutex, 587 MUTEX_LOCK_OFFSET(lt->region, &newl->mutex)); 588 (void)__db_mutex_lock(&newl->mutex, lt->reginfo.fd); 589 590 newl->status = DB_LSTAT_WAITING; 591 lrp->nconflicts++; 592 593 /* 594 * We are about to wait; must release the region mutex. Then, 595 * when we wakeup, we need to reacquire the region mutex before 596 * continuing. 597 */ 598 if (lrp->detect == DB_LOCK_NORUN) 599 lt->region->need_dd = 1; 600 UNLOCK_LOCKREGION(lt); 601 602 /* 603 * We are about to wait; before waiting, see if the deadlock 604 * detector should be run. 605 */ 606 if (lrp->detect != DB_LOCK_NORUN && !no_dd) 607 (void)lock_detect(lt, 0, lrp->detect); 608 609 (void)__db_mutex_lock(&newl->mutex, lt->reginfo.fd); 610 611 LOCK_LOCKREGION(lt); 612 if (newl->status != DB_LSTAT_PENDING) { 613 /* 614 * If this lock errored due to a deadlock, then 615 * we have waiters that require promotion. 616 */ 617 if (newl->status == DB_LSTAT_ABORTED) 618 (void)__lock_promote(lt, sh_obj); 619 /* Return to free list. */ 620 __lock_checklocker(lt, newl, 0); 621 SH_TAILQ_INSERT_HEAD(&lrp->free_locks, newl, links, 622 __db_lock); 623 switch (newl->status) { 624 case DB_LSTAT_ABORTED: 625 ret = DB_LOCK_DEADLOCK; 626 break; 627 case DB_LSTAT_NOGRANT: 628 ret = DB_LOCK_NOTGRANTED; 629 break; 630 default: 631 ret = EINVAL; 632 break; 633 } 634 newl->status = DB_LSTAT_FREE; 635 newl = NULL; 636 } else if (LF_ISSET(DB_LOCK_UPGRADE)) { 637 /* 638 * The lock that was just granted got put on the 639 * holders list. Since we're upgrading some other 640 * lock, we've got to remove it here. 641 */ 642 SH_TAILQ_REMOVE(&sh_obj->holders, 643 newl, links, __db_lock); 644 goto upgrade; 645 } else 646 newl->status = DB_LSTAT_HELD; 647 } 648 649 *lockp = newl; 650 return (ret); 651 652 upgrade: 653 /* 654 * This was an upgrade, so return the new lock to the free list and 655 * upgrade the mode. 656 */ 657 (*lockp)->mode = lock_mode; 658 newl->status = DB_LSTAT_FREE; 659 SH_TAILQ_INSERT_HEAD(&lrp->free_locks, newl, links, __db_lock); 660 return (0); 661 } 662 663 /* 664 * __lock_is_locked -- 665 * 666 * PUBLIC: int __lock_is_locked 667 * PUBLIC: __P((DB_LOCKTAB *, u_int32_t, DBT *, db_lockmode_t)); 668 */ 669 int 670 __lock_is_locked(lt, locker, dbt, mode) 671 DB_LOCKTAB *lt; 672 u_int32_t locker; 673 DBT *dbt; 674 db_lockmode_t mode; 675 { 676 struct __db_lock *lp; 677 DB_LOCKOBJ *sh_obj; 678 DB_LOCKREGION *lrp; 679 680 lrp = lt->region; 681 682 /* Look up the object in the hash table. */ 683 HASHLOOKUP(lt->hashtab, __db_lockobj, links, 684 dbt, sh_obj, lrp->table_size, __lock_ohash, __lock_cmp); 685 if (sh_obj == NULL) 686 return (0); 687 688 for (lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock); 689 lp != NULL; 690 lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock)) { 691 if (lp->holder == locker && lp->mode == mode) 692 return (1); 693 } 694 695 return (0); 696 } 697 698 /* 699 * __lock_printlock -- 700 * 701 * PUBLIC: void __lock_printlock __P((DB_LOCKTAB *, struct __db_lock *, int)); 702 */ 703 void 704 __lock_printlock(lt, lp, ispgno) 705 DB_LOCKTAB *lt; 706 struct __db_lock *lp; 707 int ispgno; 708 { 709 DB_LOCKOBJ *lockobj; 710 db_pgno_t pgno; 711 size_t obj; 712 u_int8_t *ptr; 713 const char *mode, *status; 714 715 switch (lp->mode) { 716 case DB_LOCK_IREAD: 717 mode = "IREAD"; 718 break; 719 case DB_LOCK_IWR: 720 mode = "IWR"; 721 break; 722 case DB_LOCK_IWRITE: 723 mode = "IWRITE"; 724 break; 725 case DB_LOCK_NG: 726 mode = "NG"; 727 break; 728 case DB_LOCK_READ: 729 mode = "READ"; 730 break; 731 case DB_LOCK_WRITE: 732 mode = "WRITE"; 733 break; 734 default: 735 mode = "UNKNOWN"; 736 break; 737 } 738 switch (lp->status) { 739 case DB_LSTAT_ABORTED: 740 status = "ABORT"; 741 break; 742 case DB_LSTAT_ERR: 743 status = "ERROR"; 744 break; 745 case DB_LSTAT_FREE: 746 status = "FREE"; 747 break; 748 case DB_LSTAT_HELD: 749 status = "HELD"; 750 break; 751 case DB_LSTAT_NOGRANT: 752 status = "NONE"; 753 break; 754 case DB_LSTAT_WAITING: 755 status = "WAIT"; 756 break; 757 case DB_LSTAT_PENDING: 758 status = "PENDING"; 759 break; 760 default: 761 status = "UNKNOWN"; 762 break; 763 } 764 printf("\t%lx\t%s\t%lu\t%s\t", 765 (u_long)lp->holder, mode, (u_long)lp->refcount, status); 766 767 lockobj = (DB_LOCKOBJ *)((u_int8_t *)lp + lp->obj); 768 ptr = SH_DBT_PTR(&lockobj->lockobj); 769 if (ispgno) { 770 /* Assume this is a DBT lock. */ 771 memcpy(&pgno, ptr, sizeof(db_pgno_t)); 772 printf("page %lu\n", (u_long)pgno); 773 } else { 774 obj = (u_int8_t *)lp + lp->obj - (u_int8_t *)lt->region; 775 printf("0x%lx ", (u_long)obj); 776 __db_pr(ptr, lockobj->lockobj.size); 777 printf("\n"); 778 } 779 } 780 781 /* 782 * PUBLIC: int __lock_getobj __P((DB_LOCKTAB *, 783 * PUBLIC: u_int32_t, const DBT *, u_int32_t type, DB_LOCKOBJ **)); 784 */ 785 int 786 __lock_getobj(lt, locker, dbt, type, objp) 787 DB_LOCKTAB *lt; 788 u_int32_t locker, type; 789 const DBT *dbt; 790 DB_LOCKOBJ **objp; 791 { 792 DB_LOCKREGION *lrp; 793 DB_LOCKOBJ *sh_obj; 794 u_int32_t obj_size; 795 int ret; 796 void *p, *src; 797 798 lrp = lt->region; 799 800 /* Look up the object in the hash table. */ 801 if (type == DB_LOCK_OBJTYPE) { 802 HASHLOOKUP(lt->hashtab, __db_lockobj, links, dbt, sh_obj, 803 lrp->table_size, __lock_ohash, __lock_cmp); 804 obj_size = dbt->size; 805 } else { 806 HASHLOOKUP(lt->hashtab, __db_lockobj, links, locker, 807 sh_obj, lrp->table_size, __lock_locker_hash, 808 __lock_locker_cmp); 809 obj_size = sizeof(locker); 810 } 811 812 /* 813 * If we found the object, then we can just return it. If 814 * we didn't find the object, then we need to create it. 815 */ 816 if (sh_obj == NULL) { 817 /* Create new object and then insert it into hash table. */ 818 if ((sh_obj = 819 SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj)) == NULL) { 820 if ((ret = __lock_grow_region(lt, DB_LOCK_OBJ, 0)) != 0) 821 return (ret); 822 lrp = lt->region; 823 sh_obj = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj); 824 } 825 826 /* 827 * If we can fit this object in the structure, do so instead 828 * of shalloc-ing space for it. 829 */ 830 if (obj_size <= sizeof(sh_obj->objdata)) 831 p = sh_obj->objdata; 832 else 833 if ((ret = 834 __db_shalloc(lt->mem, obj_size, 0, &p)) != 0) { 835 if ((ret = __lock_grow_region(lt, 836 DB_LOCK_MEM, obj_size)) != 0) 837 return (ret); 838 lrp = lt->region; 839 /* Reacquire the head of the list. */ 840 sh_obj = SH_TAILQ_FIRST(&lrp->free_objs, 841 __db_lockobj); 842 (void)__db_shalloc(lt->mem, obj_size, 0, &p); 843 } 844 845 src = type == DB_LOCK_OBJTYPE ? dbt->data : (void *)&locker; 846 memcpy(p, src, obj_size); 847 848 sh_obj->type = type; 849 SH_TAILQ_REMOVE(&lrp->free_objs, sh_obj, links, __db_lockobj); 850 851 SH_TAILQ_INIT(&sh_obj->waiters); 852 if (type == DB_LOCK_LOCKER) 853 SH_LIST_INIT(&sh_obj->heldby); 854 else 855 SH_TAILQ_INIT(&sh_obj->holders); 856 sh_obj->lockobj.size = obj_size; 857 sh_obj->lockobj.off = SH_PTR_TO_OFF(&sh_obj->lockobj, p); 858 859 HASHINSERT(lt->hashtab, 860 __db_lockobj, links, sh_obj, lrp->table_size, __lock_lhash); 861 862 if (type == DB_LOCK_LOCKER) 863 lrp->nlockers++; 864 } 865 866 *objp = sh_obj; 867 return (0); 868 } 869 870 /* 871 * Any lock on the waitlist has a process waiting for it. Therefore, we 872 * can't return the lock to the freelist immediately. Instead, we can 873 * remove the lock from the list of waiters, set the status field of the 874 * lock, and then let the process waking up return the lock to the 875 * free list. 876 */ 877 static void 878 __lock_remove_waiter(lt, sh_obj, lockp, status) 879 DB_LOCKTAB *lt; 880 DB_LOCKOBJ *sh_obj; 881 struct __db_lock *lockp; 882 db_status_t status; 883 { 884 SH_TAILQ_REMOVE(&sh_obj->waiters, lockp, links, __db_lock); 885 lockp->status = status; 886 887 /* Wake whoever is waiting on this lock. */ 888 (void)__db_mutex_unlock(&lockp->mutex, lt->reginfo.fd); 889 } 890 891 static void 892 __lock_checklocker(lt, lockp, do_remove) 893 DB_LOCKTAB *lt; 894 struct __db_lock *lockp; 895 int do_remove; 896 { 897 DB_LOCKOBJ *sh_locker; 898 899 if (do_remove) 900 SH_LIST_REMOVE(lockp, locker_links, __db_lock); 901 902 /* if the locker list is NULL, free up the object. */ 903 if (__lock_getobj(lt, lockp->holder, NULL, DB_LOCK_LOCKER, &sh_locker) 904 == 0 && SH_LIST_FIRST(&sh_locker->heldby, __db_lock) == NULL) { 905 __lock_freeobj(lt, sh_locker); 906 lt->region->nlockers--; 907 } 908 } 909 910 static void 911 __lock_freeobj(lt, obj) 912 DB_LOCKTAB *lt; 913 DB_LOCKOBJ *obj; 914 { 915 HASHREMOVE_EL(lt->hashtab, 916 __db_lockobj, links, obj, lt->region->table_size, __lock_lhash); 917 if (obj->lockobj.size > sizeof(obj->objdata)) 918 __db_shalloc_free(lt->mem, SH_DBT_PTR(&obj->lockobj)); 919 SH_TAILQ_INSERT_HEAD(<->region->free_objs, obj, links, __db_lockobj); 920 } 921 922 /* 923 * __lock_downgrade -- 924 * Used by the concurrent access product to downgrade write locks 925 * back to iwrite locks. 926 * 927 * PUBLIC: int __lock_downgrade __P((DB_LOCKTAB *, 928 * PUBLIC: DB_LOCK, db_lockmode_t, u_int32_t)); 929 */ 930 int 931 __lock_downgrade(lt, lock, new_mode, flags) 932 DB_LOCKTAB *lt; 933 DB_LOCK lock; 934 db_lockmode_t new_mode; 935 u_int32_t flags; 936 { 937 struct __db_lock *lockp; 938 DB_LOCKOBJ *obj; 939 int ret; 940 941 COMPQUIET(flags, 0); 942 LOCK_PANIC_CHECK(lt); 943 LOCK_LOCKREGION(lt); 944 945 if ((ret = __lock_validate_region(lt)) == 0) { 946 lockp = OFFSET_TO_LOCK(lt, lock); 947 lockp->mode = new_mode; 948 949 /* Get the object associated with this lock. */ 950 obj = (DB_LOCKOBJ *)((u_int8_t *)lockp + lockp->obj); 951 (void)__lock_promote(lt, obj); 952 ++lt->region->nreleases; 953 } 954 955 UNLOCK_LOCKREGION(lt); 956 957 return (ret); 958 } 959 960 /* 961 * __lock_promote -- 962 * 963 * Look through the waiters and holders lists and decide which (if any) 964 * locks can be promoted. Promote any that are eligible. 965 */ 966 static int 967 __lock_promote(lt, obj) 968 DB_LOCKTAB *lt; 969 DB_LOCKOBJ *obj; 970 { 971 struct __db_lock *lp_w, *lp_h, *next_waiter; 972 int state_changed, waiter_is_txn; 973 974 /* 975 * We need to do lock promotion. We also need to determine if 976 * we're going to need to run the deadlock detector again. If 977 * we release locks, and there are waiters, but no one gets promoted, 978 * then we haven't fundamentally changed the lockmgr state, so 979 * we may still have a deadlock and we have to run again. However, 980 * if there were no waiters, or we actually promoted someone, then 981 * we are OK and we don't have to run it immediately. 982 * 983 * During promotion, we look for state changes so we can return 984 * this information to the caller. 985 */ 986 for (lp_w = SH_TAILQ_FIRST(&obj->waiters, __db_lock), 987 state_changed = lp_w == NULL; 988 lp_w != NULL; 989 lp_w = next_waiter) { 990 waiter_is_txn = TXN_IS_HOLDING(lp_w); 991 next_waiter = SH_TAILQ_NEXT(lp_w, links, __db_lock); 992 for (lp_h = SH_TAILQ_FIRST(&obj->holders, __db_lock); 993 lp_h != NULL; 994 lp_h = SH_TAILQ_NEXT(lp_h, links, __db_lock)) { 995 if (CONFLICTS(lt, lp_h->mode, lp_w->mode) && 996 lp_h->holder != lp_w->holder && 997 !(waiter_is_txn && 998 TXN_IS_HOLDING(lp_h) && 999 __txn_is_ancestor(lt->dbenv->tx_info, 1000 lp_h->txnoff, lp_w->txnoff))) 1001 break; 1002 } 1003 if (lp_h != NULL) /* Found a conflict. */ 1004 break; 1005 1006 /* No conflict, promote the waiting lock. */ 1007 SH_TAILQ_REMOVE(&obj->waiters, lp_w, links, __db_lock); 1008 lp_w->status = DB_LSTAT_PENDING; 1009 SH_TAILQ_INSERT_TAIL(&obj->holders, lp_w, links); 1010 1011 /* Wake up waiter. */ 1012 (void)__db_mutex_unlock(&lp_w->mutex, lt->reginfo.fd); 1013 state_changed = 1; 1014 } 1015 1016 return (state_changed); 1017 } 1018 1019 static int 1020 __lock_is_parent(locker, txn) 1021 u_int32_t locker; 1022 DB_TXN *txn; 1023 { 1024 DB_TXN *t; 1025 1026 if (txn == NULL) 1027 return (0); 1028 1029 for (t = txn->parent; t != NULL; t = t->parent) 1030 if (t->txnid == locker) 1031 return (1); 1032 1033 return (0); 1034 } 1035