1 /*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996, 1997, 1998
5 * Sleepycat Software. All rights reserved.
6 */
7
8 #include "config.h"
9
10 #ifndef lint
11 static const char sccsid[] = "@(#)lock.c 10.61 (Sleepycat) 1/3/99";
12 #endif /* not lint */
13
14 #ifndef NO_SYSTEM_INCLUDES
15 #include <sys/types.h>
16
17 #include <errno.h>
18 #include <string.h>
19 #endif
20
21 #include "db_int.h"
22 #include "shqueue.h"
23 #include "db_page.h"
24 #include "db_shash.h"
25 #include "lock.h"
26 #include "db_am.h"
27 #include "txn_auto.h"
28 #include "txn_ext.h"
29 #include "common_ext.h"
30
31 static void __lock_checklocker __P((DB_LOCKTAB *, struct __db_lock *, int));
32 static void __lock_freeobj __P((DB_LOCKTAB *, DB_LOCKOBJ *));
33 static int __lock_get_internal __P((DB_LOCKTAB *, u_int32_t, DB_TXN *,
34 u_int32_t, const DBT *, db_lockmode_t, struct __db_lock **));
35 static int __lock_is_parent __P((u_int32_t, DB_TXN *));
36 static int __lock_promote __P((DB_LOCKTAB *, DB_LOCKOBJ *));
37 static int __lock_put_internal __P((DB_LOCKTAB *, struct __db_lock *, int));
38 static void __lock_remove_waiter
39 __P((DB_LOCKTAB *, DB_LOCKOBJ *, struct __db_lock *, db_status_t));
40 static int __lock_vec_internal __P((DB_LOCKTAB *, u_int32_t, DB_TXN *,
41 u_int32_t, DB_LOCKREQ *, int, DB_LOCKREQ **elistp));
42
43 int
lock_id(lt,idp)44 lock_id(lt, idp)
45 DB_LOCKTAB *lt;
46 u_int32_t *idp;
47 {
48 u_int32_t id;
49
50 LOCK_PANIC_CHECK(lt);
51
52 LOCK_LOCKREGION(lt);
53 if (lt->region->id >= DB_LOCK_MAXID)
54 lt->region->id = 0;
55 id = ++lt->region->id;
56 UNLOCK_LOCKREGION(lt);
57
58 *idp = id;
59 return (0);
60 }
61
62 int
lock_vec(lt,locker,flags,list,nlist,elistp)63 lock_vec(lt, locker, flags, list, nlist, elistp)
64 DB_LOCKTAB *lt;
65 u_int32_t locker, flags;
66 int nlist;
67 DB_LOCKREQ *list, **elistp;
68 {
69 return (__lock_vec_internal(lt,
70 locker, NULL, flags, list, nlist, elistp));
71 }
72
73 int
lock_tvec(lt,txn,flags,list,nlist,elistp)74 lock_tvec(lt, txn, flags, list, nlist, elistp)
75 DB_LOCKTAB *lt;
76 DB_TXN *txn;
77 u_int32_t flags;
78 int nlist;
79 DB_LOCKREQ *list, **elistp;
80 {
81 return (__lock_vec_internal(lt,
82 txn->txnid, txn, flags, list, nlist, elistp));
83 }
84
85 static int
__lock_vec_internal(lt,locker,txn,flags,list,nlist,elistp)86 __lock_vec_internal(lt, locker, txn, flags, list, nlist, elistp)
87 DB_LOCKTAB *lt;
88 u_int32_t locker;
89 DB_TXN *txn;
90 u_int32_t flags;
91 int nlist;
92 DB_LOCKREQ *list, **elistp;
93 {
94 struct __db_lock *lp;
95 DB_LOCKOBJ *sh_obj, *sh_locker, *sh_parent;
96 int i, ret, run_dd;
97
98 LOCK_PANIC_CHECK(lt);
99
100 /* Validate arguments. */
101 if ((ret =
102 __db_fchk(lt->dbenv, "lock_vec", flags, DB_LOCK_NOWAIT)) != 0)
103 return (ret);
104
105 LOCK_LOCKREGION(lt);
106
107 if ((ret = __lock_validate_region(lt)) != 0) {
108 UNLOCK_LOCKREGION(lt);
109 return (ret);
110 }
111
112 ret = 0;
113 for (i = 0; i < nlist && ret == 0; i++) {
114 switch (list[i].op) {
115 case DB_LOCK_GET:
116 ret = __lock_get_internal(lt, locker, txn, flags,
117 list[i].obj, list[i].mode, &lp);
118 if (ret == 0) {
119 list[i].lock = LOCK_TO_OFFSET(lt, lp);
120 lt->region->nrequests++;
121 }
122 break;
123 case DB_LOCK_INHERIT:
124 /* Find the locker. */
125 if ((ret = __lock_getobj(lt, locker,
126 NULL, DB_LOCK_LOCKER, &sh_locker)) != 0)
127 break;
128 if (txn == NULL || txn->parent == NULL) {
129 ret = EINVAL;
130 break;
131 }
132
133 if ((ret = __lock_getobj(lt, txn->parent->txnid,
134 NULL, DB_LOCK_LOCKER, &sh_parent)) != 0)
135 break;
136
137 /*
138 * Traverse all the locks held by this locker. Remove
139 * the locks from the locker's list and put them on the
140 * parent's list.
141 */
142 for (lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock);
143 lp != NULL;
144 lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock)) {
145 SH_LIST_REMOVE(lp, locker_links, __db_lock);
146 SH_LIST_INSERT_HEAD(&sh_parent->heldby, lp,
147 locker_links, __db_lock);
148 lp->holder = txn->parent->txnid;
149 }
150 __lock_freeobj(lt, sh_locker);
151 lt->region->nlockers--;
152 break;
153 case DB_LOCK_PUT:
154 lp = OFFSET_TO_LOCK(lt, list[i].lock);
155 if (lp->holder != locker) {
156 ret = DB_LOCK_NOTHELD;
157 break;
158 }
159 list[i].mode = lp->mode;
160
161 ret = __lock_put_internal(lt, lp, 0);
162 __lock_checklocker(lt, lp, 0);
163 break;
164 case DB_LOCK_PUT_ALL:
165 /* Find the locker. */
166 if ((ret = __lock_getobj(lt, locker,
167 NULL, DB_LOCK_LOCKER, &sh_locker)) != 0)
168 break;
169
170 for (lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock);
171 lp != NULL;
172 lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock)) {
173 if ((ret = __lock_put_internal(lt, lp, 1)) != 0)
174 break;
175 }
176 __lock_freeobj(lt, sh_locker);
177 lt->region->nlockers--;
178 break;
179 case DB_LOCK_PUT_OBJ:
180
181 /* Look up the object in the hash table. */
182 HASHLOOKUP(lt->hashtab, __db_lockobj, links,
183 list[i].obj, sh_obj, lt->region->table_size,
184 __lock_ohash, __lock_cmp);
185 if (sh_obj == NULL) {
186 ret = EINVAL;
187 break;
188 }
189 /*
190 * Release waiters first, because they won't cause
191 * anyone else to be awakened. If we release the
192 * lockers first, all the waiters get awakened
193 * needlessly.
194 */
195 for (lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock);
196 lp != NULL;
197 lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock)) {
198 lt->region->nreleases += lp->refcount;
199 __lock_remove_waiter(lt, sh_obj, lp,
200 DB_LSTAT_NOGRANT);
201 __lock_checklocker(lt, lp, 1);
202 }
203
204 for (lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock);
205 lp != NULL;
206 lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock)) {
207
208 lt->region->nreleases += lp->refcount;
209 SH_LIST_REMOVE(lp, locker_links, __db_lock);
210 SH_TAILQ_REMOVE(&sh_obj->holders, lp, links,
211 __db_lock);
212 lp->status = DB_LSTAT_FREE;
213 SH_TAILQ_INSERT_HEAD(<->region->free_locks,
214 lp, links, __db_lock);
215 }
216
217 /* Now free the object. */
218 __lock_freeobj(lt, sh_obj);
219 break;
220 #ifdef DEBUG
221 case DB_LOCK_DUMP:
222 /* Find the locker. */
223 if ((ret = __lock_getobj(lt, locker,
224 NULL, DB_LOCK_LOCKER, &sh_locker)) != 0)
225 break;
226
227 for (lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock);
228 lp != NULL;
229 lp = SH_LIST_NEXT(lp, locker_links, __db_lock)) {
230 __lock_printlock(lt, lp, 1);
231 ret = EINVAL;
232 }
233 if (ret == 0) {
234 __lock_freeobj(lt, sh_locker);
235 lt->region->nlockers--;
236 }
237 break;
238 #endif
239 default:
240 ret = EINVAL;
241 break;
242 }
243 }
244
245 if (lt->region->need_dd && lt->region->detect != DB_LOCK_NORUN) {
246 run_dd = 1;
247 lt->region->need_dd = 0;
248 } else
249 run_dd = 0;
250
251 UNLOCK_LOCKREGION(lt);
252
253 if (ret == 0 && run_dd)
254 lock_detect(lt, 0, lt->region->detect);
255
256 if (elistp && ret != 0)
257 *elistp = &list[i - 1];
258 return (ret);
259 }
260
261 int
lock_get(lt,locker,flags,obj,lock_mode,lock)262 lock_get(lt, locker, flags, obj, lock_mode, lock)
263 DB_LOCKTAB *lt;
264 u_int32_t locker, flags;
265 const DBT *obj;
266 db_lockmode_t lock_mode;
267 DB_LOCK *lock;
268 {
269 struct __db_lock *lockp;
270 int ret;
271
272 LOCK_PANIC_CHECK(lt);
273
274 /* Validate arguments. */
275 if ((ret = __db_fchk(lt->dbenv,
276 "lock_get", flags, DB_LOCK_NOWAIT | DB_LOCK_UPGRADE)) != 0)
277 return (ret);
278
279 LOCK_LOCKREGION(lt);
280
281 if ((ret = __lock_validate_region(lt)) == 0) {
282 if (LF_ISSET(DB_LOCK_UPGRADE))
283 lockp = OFFSET_TO_LOCK(lt, *lock);
284
285 if ((ret = __lock_get_internal(lt,
286 locker, NULL, flags, obj, lock_mode, &lockp)) == 0) {
287 if (!LF_ISSET(DB_LOCK_UPGRADE))
288 *lock = LOCK_TO_OFFSET(lt, lockp);
289 lt->region->nrequests++;
290 }
291 }
292
293 UNLOCK_LOCKREGION(lt);
294 return (ret);
295 }
296
297 int
lock_tget(lt,txn,flags,obj,lock_mode,lock)298 lock_tget(lt, txn, flags, obj, lock_mode, lock)
299 DB_LOCKTAB *lt;
300 DB_TXN *txn;
301 u_int32_t flags;
302 const DBT *obj;
303 db_lockmode_t lock_mode;
304 DB_LOCK *lock;
305 {
306 struct __db_lock *lockp;
307 int ret;
308
309 LOCK_PANIC_CHECK(lt);
310
311 /* Validate arguments. */
312 if ((ret = __db_fchk(lt->dbenv,
313 "lock_get", flags, DB_LOCK_NOWAIT | DB_LOCK_UPGRADE)) != 0)
314 return (ret);
315
316 LOCK_LOCKREGION(lt);
317
318 if ((ret = __lock_validate_region(lt)) == 0) {
319 if (LF_ISSET(DB_LOCK_UPGRADE))
320 lockp = OFFSET_TO_LOCK(lt, *lock);
321
322 if ((ret = __lock_get_internal(lt,
323 txn->txnid, txn, flags, obj, lock_mode, &lockp)) == 0) {
324 if (!LF_ISSET(DB_LOCK_UPGRADE))
325 *lock = LOCK_TO_OFFSET(lt, lockp);
326 lt->region->nrequests++;
327 }
328 }
329
330 UNLOCK_LOCKREGION(lt);
331 return (ret);
332 }
333 int
lock_put(lt,lock)334 lock_put(lt, lock)
335 DB_LOCKTAB *lt;
336 DB_LOCK lock;
337 {
338 struct __db_lock *lockp;
339 int ret, run_dd;
340
341 LOCK_PANIC_CHECK(lt);
342
343 LOCK_LOCKREGION(lt);
344
345 if ((ret = __lock_validate_region(lt)) != 0)
346 return (ret);
347 else {
348 lockp = OFFSET_TO_LOCK(lt, lock);
349 ret = __lock_put_internal(lt, lockp, 0);
350 }
351
352 __lock_checklocker(lt, lockp, 0);
353
354 if (lt->region->need_dd && lt->region->detect != DB_LOCK_NORUN) {
355 run_dd = 1;
356 lt->region->need_dd = 0;
357 } else
358 run_dd = 0;
359
360 UNLOCK_LOCKREGION(lt);
361
362 if (ret == 0 && run_dd)
363 lock_detect(lt, 0, lt->region->detect);
364
365 return (ret);
366 }
367
368 static int
__lock_put_internal(lt,lockp,do_all)369 __lock_put_internal(lt, lockp, do_all)
370 DB_LOCKTAB *lt;
371 struct __db_lock *lockp;
372 int do_all;
373 {
374 DB_LOCKOBJ *sh_obj;
375 int state_changed;
376
377 if (lockp->refcount == 0 || (lockp->status != DB_LSTAT_HELD &&
378 lockp->status != DB_LSTAT_WAITING) || lockp->obj == 0) {
379 __db_err(lt->dbenv, "lock_put: invalid lock %lu",
380 (u_long)((u_int8_t *)lockp - (u_int8_t *)lt->region));
381 return (EINVAL);
382 }
383
384 if (do_all)
385 lt->region->nreleases += lockp->refcount;
386 else
387 lt->region->nreleases++;
388 if (do_all == 0 && lockp->refcount > 1) {
389 lockp->refcount--;
390 return (0);
391 }
392
393 /* Get the object associated with this lock. */
394 sh_obj = (DB_LOCKOBJ *)((u_int8_t *)lockp + lockp->obj);
395
396 /* Remove lock from locker list. */
397 SH_LIST_REMOVE(lockp, locker_links, __db_lock);
398
399 /* Remove this lock from its holders/waitlist. */
400 if (lockp->status != DB_LSTAT_HELD)
401 __lock_remove_waiter(lt, sh_obj, lockp, DB_LSTAT_FREE);
402 else
403 SH_TAILQ_REMOVE(&sh_obj->holders, lockp, links, __db_lock);
404
405 state_changed = __lock_promote(lt, sh_obj);
406
407 /* Check if object should be reclaimed. */
408 if (SH_TAILQ_FIRST(&sh_obj->holders, __db_lock) == NULL) {
409 HASHREMOVE_EL(lt->hashtab, __db_lockobj,
410 links, sh_obj, lt->region->table_size, __lock_lhash);
411 if (sh_obj->lockobj.size > sizeof(sh_obj->objdata))
412 __db_shalloc_free(lt->mem,
413 SH_DBT_PTR(&sh_obj->lockobj));
414 SH_TAILQ_INSERT_HEAD(<->region->free_objs, sh_obj, links,
415 __db_lockobj);
416 state_changed = 1;
417 }
418
419 /* Free lock. */
420 lockp->status = DB_LSTAT_FREE;
421 SH_TAILQ_INSERT_HEAD(<->region->free_locks, lockp, links, __db_lock);
422
423 /*
424 * If we did not promote anyone; we need to run the deadlock
425 * detector again.
426 */
427 if (state_changed == 0)
428 lt->region->need_dd = 1;
429
430 return (0);
431 }
432
433 static int
__lock_get_internal(lt,locker,txn,flags,obj,lock_mode,lockp)434 __lock_get_internal(lt, locker, txn, flags, obj, lock_mode, lockp)
435 DB_LOCKTAB *lt;
436 u_int32_t locker, flags;
437 DB_TXN *txn;
438 const DBT *obj;
439 db_lockmode_t lock_mode;
440 struct __db_lock **lockp;
441 {
442 struct __db_lock *newl, *lp;
443 DB_LOCKOBJ *sh_obj, *sh_locker;
444 DB_LOCKREGION *lrp;
445 size_t newl_off;
446 int ihold, no_dd, ret;
447
448 no_dd = ret = 0;
449
450 /*
451 * Check that lock mode is valid.
452 */
453 lrp = lt->region;
454 if ((u_int32_t)lock_mode >= lrp->nmodes) {
455 __db_err(lt->dbenv,
456 "lock_get: invalid lock mode %lu\n", (u_long)lock_mode);
457 return (EINVAL);
458 }
459
460 /* Allocate a new lock. Optimize for the common case of a grant. */
461 if ((newl = SH_TAILQ_FIRST(&lrp->free_locks, __db_lock)) == NULL) {
462 if ((ret = __lock_grow_region(lt, DB_LOCK_LOCK, 0)) != 0)
463 return (ret);
464 lrp = lt->region;
465 newl = SH_TAILQ_FIRST(&lrp->free_locks, __db_lock);
466 }
467 newl_off = LOCK_TO_OFFSET(lt, newl);
468
469 /* Optimize for common case of granting a lock. */
470 SH_TAILQ_REMOVE(&lrp->free_locks, newl, links, __db_lock);
471
472 newl->mode = lock_mode;
473 newl->status = DB_LSTAT_HELD;
474 newl->holder = locker;
475 newl->refcount = 1;
476
477 if ((ret = __lock_getobj(lt, 0, obj, DB_LOCK_OBJTYPE, &sh_obj)) != 0)
478 return (ret);
479
480 lrp = lt->region; /* getobj might have grown */
481 newl = OFFSET_TO_LOCK(lt, newl_off);
482
483 /* Now make new lock point to object */
484 newl->obj = SH_PTR_TO_OFF(newl, sh_obj);
485
486 /*
487 * Now we have a lock and an object and we need to see if we should
488 * grant the lock. We use a FIFO ordering so we can only grant a
489 * new lock if it does not conflict with anyone on the holders list
490 * OR anyone on the waiters list. The reason that we don't grant if
491 * there's a conflict is that this can lead to starvation (a writer
492 * waiting on a popularly read item will never be granted). The
493 * downside of this is that a waiting reader can prevent an upgrade
494 * from reader to writer, which is not uncommon.
495 *
496 * There is one exception to the no-conflict rule. If a lock is held
497 * by the requesting locker AND the new lock does not conflict with
498 * any other holders, then we grant the lock. The most common place
499 * this happens is when the holder has a WRITE lock and a READ lock
500 * request comes in for the same locker. If we do not grant the read
501 * lock, then we guarantee deadlock.
502 *
503 * In case of conflict, we put the new lock on the end of the waiters
504 * list, unless we are upgrading in which case the locker goes on the
505 * front of the list.
506 */
507 ihold = 0;
508 for (lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock);
509 lp != NULL;
510 lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
511 if (locker == lp->holder ||
512 __lock_is_parent(lp->holder, txn)) {
513 if (lp->mode == lock_mode &&
514 lp->status == DB_LSTAT_HELD) {
515 if (LF_ISSET(DB_LOCK_UPGRADE))
516 goto upgrade;
517
518 /*
519 * Lock is held, so we can increment the
520 * reference count and return this lock.
521 */
522 lp->refcount++;
523 *lockp = lp;
524 SH_TAILQ_INSERT_HEAD(&lrp->free_locks,
525 newl, links, __db_lock);
526 return (0);
527 } else
528 ihold = 1;
529 } else if (CONFLICTS(lt, lp->mode, lock_mode))
530 break;
531 }
532
533 /*
534 * If we are upgrading, then there are two scenarios. Either
535 * we had no conflicts, so we can do the upgrade. Or, there
536 * is a conflict and we should wait at the HEAD of the waiters
537 * list.
538 */
539 if (LF_ISSET(DB_LOCK_UPGRADE)) {
540 if (lp == NULL)
541 goto upgrade;
542
543 /* There was a conflict, wait. */
544 SH_TAILQ_INSERT_HEAD(&sh_obj->waiters, newl, links, __db_lock);
545 goto wait;
546 }
547
548 if (lp == NULL && !ihold)
549 for (lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock);
550 lp != NULL;
551 lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
552 if (CONFLICTS(lt, lp->mode, lock_mode) &&
553 locker != lp->holder)
554 break;
555 }
556 if (lp == NULL)
557 SH_TAILQ_INSERT_TAIL(&sh_obj->holders, newl, links);
558 else if (!(flags & DB_LOCK_NOWAIT))
559 SH_TAILQ_INSERT_TAIL(&sh_obj->waiters, newl, links);
560 else {
561 /* Free the lock and return an error. */
562 newl->status = DB_LSTAT_FREE;
563 SH_TAILQ_INSERT_HEAD(&lrp->free_locks, newl, links, __db_lock);
564 return (DB_LOCK_NOTGRANTED);
565 }
566
567 /*
568 * Now, insert the lock onto its locker's list. If the locker does
569 * not currently hold any locks, there's no reason to run a deadlock
570 * detector, save that information.
571 */
572 if ((ret =
573 __lock_getobj(lt, locker, NULL, DB_LOCK_LOCKER, &sh_locker)) != 0)
574 return (ret);
575 no_dd = SH_LIST_FIRST(&sh_locker->heldby, __db_lock) == NULL;
576
577 lrp = lt->region;
578 SH_LIST_INSERT_HEAD(&sh_locker->heldby, newl, locker_links, __db_lock);
579
580 if (lp != NULL) {
581 /*
582 * This is really a blocker for the process, so initialize it
583 * set. That way the current process will block when it tries
584 * to get it and the waking process will release it.
585 */
586 wait: (void)__db_mutex_init(&newl->mutex,
587 MUTEX_LOCK_OFFSET(lt->region, &newl->mutex));
588 (void)__db_mutex_lock(&newl->mutex, lt->reginfo.fd);
589
590 newl->status = DB_LSTAT_WAITING;
591 lrp->nconflicts++;
592
593 /*
594 * We are about to wait; must release the region mutex. Then,
595 * when we wakeup, we need to reacquire the region mutex before
596 * continuing.
597 */
598 if (lrp->detect == DB_LOCK_NORUN)
599 lt->region->need_dd = 1;
600 UNLOCK_LOCKREGION(lt);
601
602 /*
603 * We are about to wait; before waiting, see if the deadlock
604 * detector should be run.
605 */
606 if (lrp->detect != DB_LOCK_NORUN && !no_dd)
607 (void)lock_detect(lt, 0, lrp->detect);
608
609 (void)__db_mutex_lock(&newl->mutex, lt->reginfo.fd);
610
611 LOCK_LOCKREGION(lt);
612 if (newl->status != DB_LSTAT_PENDING) {
613 /*
614 * If this lock errored due to a deadlock, then
615 * we have waiters that require promotion.
616 */
617 if (newl->status == DB_LSTAT_ABORTED)
618 (void)__lock_promote(lt, sh_obj);
619 /* Return to free list. */
620 __lock_checklocker(lt, newl, 0);
621 SH_TAILQ_INSERT_HEAD(&lrp->free_locks, newl, links,
622 __db_lock);
623 switch (newl->status) {
624 case DB_LSTAT_ABORTED:
625 ret = DB_LOCK_DEADLOCK;
626 break;
627 case DB_LSTAT_NOGRANT:
628 ret = DB_LOCK_NOTGRANTED;
629 break;
630 default:
631 ret = EINVAL;
632 break;
633 }
634 newl->status = DB_LSTAT_FREE;
635 newl = NULL;
636 } else if (LF_ISSET(DB_LOCK_UPGRADE)) {
637 /*
638 * The lock that was just granted got put on the
639 * holders list. Since we're upgrading some other
640 * lock, we've got to remove it here.
641 */
642 SH_TAILQ_REMOVE(&sh_obj->holders,
643 newl, links, __db_lock);
644 goto upgrade;
645 } else
646 newl->status = DB_LSTAT_HELD;
647 }
648
649 *lockp = newl;
650 return (ret);
651
652 upgrade:
653 /*
654 * This was an upgrade, so return the new lock to the free list and
655 * upgrade the mode.
656 */
657 (*lockp)->mode = lock_mode;
658 newl->status = DB_LSTAT_FREE;
659 SH_TAILQ_INSERT_HEAD(&lrp->free_locks, newl, links, __db_lock);
660 return (0);
661 }
662
663 /*
664 * __lock_is_locked --
665 *
666 * PUBLIC: int __lock_is_locked
667 * PUBLIC: __P((DB_LOCKTAB *, u_int32_t, DBT *, db_lockmode_t));
668 */
669 int
__lock_is_locked(lt,locker,dbt,mode)670 __lock_is_locked(lt, locker, dbt, mode)
671 DB_LOCKTAB *lt;
672 u_int32_t locker;
673 DBT *dbt;
674 db_lockmode_t mode;
675 {
676 struct __db_lock *lp;
677 DB_LOCKOBJ *sh_obj;
678 DB_LOCKREGION *lrp;
679
680 lrp = lt->region;
681
682 /* Look up the object in the hash table. */
683 HASHLOOKUP(lt->hashtab, __db_lockobj, links,
684 dbt, sh_obj, lrp->table_size, __lock_ohash, __lock_cmp);
685 if (sh_obj == NULL)
686 return (0);
687
688 for (lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock);
689 lp != NULL;
690 lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock)) {
691 if (lp->holder == locker && lp->mode == mode)
692 return (1);
693 }
694
695 return (0);
696 }
697
698 /*
699 * __lock_printlock --
700 *
701 * PUBLIC: void __lock_printlock __P((DB_LOCKTAB *, struct __db_lock *, int));
702 */
703 void
__lock_printlock(lt,lp,ispgno)704 __lock_printlock(lt, lp, ispgno)
705 DB_LOCKTAB *lt;
706 struct __db_lock *lp;
707 int ispgno;
708 {
709 DB_LOCKOBJ *lockobj;
710 db_pgno_t pgno;
711 size_t obj;
712 u_int8_t *ptr;
713 const char *mode, *status;
714
715 switch (lp->mode) {
716 case DB_LOCK_IREAD:
717 mode = "IREAD";
718 break;
719 case DB_LOCK_IWR:
720 mode = "IWR";
721 break;
722 case DB_LOCK_IWRITE:
723 mode = "IWRITE";
724 break;
725 case DB_LOCK_NG:
726 mode = "NG";
727 break;
728 case DB_LOCK_READ:
729 mode = "READ";
730 break;
731 case DB_LOCK_WRITE:
732 mode = "WRITE";
733 break;
734 default:
735 mode = "UNKNOWN";
736 break;
737 }
738 switch (lp->status) {
739 case DB_LSTAT_ABORTED:
740 status = "ABORT";
741 break;
742 case DB_LSTAT_ERR:
743 status = "ERROR";
744 break;
745 case DB_LSTAT_FREE:
746 status = "FREE";
747 break;
748 case DB_LSTAT_HELD:
749 status = "HELD";
750 break;
751 case DB_LSTAT_NOGRANT:
752 status = "NONE";
753 break;
754 case DB_LSTAT_WAITING:
755 status = "WAIT";
756 break;
757 case DB_LSTAT_PENDING:
758 status = "PENDING";
759 break;
760 default:
761 status = "UNKNOWN";
762 break;
763 }
764 printf("\t%lx\t%s\t%lu\t%s\t",
765 (u_long)lp->holder, mode, (u_long)lp->refcount, status);
766
767 lockobj = (DB_LOCKOBJ *)((u_int8_t *)lp + lp->obj);
768 ptr = SH_DBT_PTR(&lockobj->lockobj);
769 if (ispgno) {
770 /* Assume this is a DBT lock. */
771 memcpy(&pgno, ptr, sizeof(db_pgno_t));
772 printf("page %lu\n", (u_long)pgno);
773 } else {
774 obj = (u_int8_t *)lp + lp->obj - (u_int8_t *)lt->region;
775 printf("0x%lx ", (u_long)obj);
776 __db_pr(ptr, lockobj->lockobj.size);
777 printf("\n");
778 }
779 }
780
781 /*
782 * PUBLIC: int __lock_getobj __P((DB_LOCKTAB *,
783 * PUBLIC: u_int32_t, const DBT *, u_int32_t type, DB_LOCKOBJ **));
784 */
785 int
__lock_getobj(lt,locker,dbt,type,objp)786 __lock_getobj(lt, locker, dbt, type, objp)
787 DB_LOCKTAB *lt;
788 u_int32_t locker, type;
789 const DBT *dbt;
790 DB_LOCKOBJ **objp;
791 {
792 DB_LOCKREGION *lrp;
793 DB_LOCKOBJ *sh_obj;
794 u_int32_t obj_size;
795 int ret;
796 void *p, *src;
797
798 lrp = lt->region;
799
800 /* Look up the object in the hash table. */
801 if (type == DB_LOCK_OBJTYPE) {
802 HASHLOOKUP(lt->hashtab, __db_lockobj, links, dbt, sh_obj,
803 lrp->table_size, __lock_ohash, __lock_cmp);
804 obj_size = dbt->size;
805 } else {
806 HASHLOOKUP(lt->hashtab, __db_lockobj, links, locker,
807 sh_obj, lrp->table_size, __lock_locker_hash,
808 __lock_locker_cmp);
809 obj_size = sizeof(locker);
810 }
811
812 /*
813 * If we found the object, then we can just return it. If
814 * we didn't find the object, then we need to create it.
815 */
816 if (sh_obj == NULL) {
817 /* Create new object and then insert it into hash table. */
818 if ((sh_obj =
819 SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj)) == NULL) {
820 if ((ret = __lock_grow_region(lt, DB_LOCK_OBJ, 0)) != 0)
821 return (ret);
822 lrp = lt->region;
823 sh_obj = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj);
824 }
825
826 /*
827 * If we can fit this object in the structure, do so instead
828 * of shalloc-ing space for it.
829 */
830 if (obj_size <= sizeof(sh_obj->objdata))
831 p = sh_obj->objdata;
832 else
833 if ((ret =
834 __db_shalloc(lt->mem, obj_size, 0, &p)) != 0) {
835 if ((ret = __lock_grow_region(lt,
836 DB_LOCK_MEM, obj_size)) != 0)
837 return (ret);
838 lrp = lt->region;
839 /* Reacquire the head of the list. */
840 sh_obj = SH_TAILQ_FIRST(&lrp->free_objs,
841 __db_lockobj);
842 (void)__db_shalloc(lt->mem, obj_size, 0, &p);
843 }
844
845 src = type == DB_LOCK_OBJTYPE ? dbt->data : (void *)&locker;
846 memcpy(p, src, obj_size);
847
848 sh_obj->type = type;
849 SH_TAILQ_REMOVE(&lrp->free_objs, sh_obj, links, __db_lockobj);
850
851 SH_TAILQ_INIT(&sh_obj->waiters);
852 if (type == DB_LOCK_LOCKER)
853 SH_LIST_INIT(&sh_obj->heldby);
854 else
855 SH_TAILQ_INIT(&sh_obj->holders);
856 sh_obj->lockobj.size = obj_size;
857 sh_obj->lockobj.off = SH_PTR_TO_OFF(&sh_obj->lockobj, p);
858
859 HASHINSERT(lt->hashtab,
860 __db_lockobj, links, sh_obj, lrp->table_size, __lock_lhash);
861
862 if (type == DB_LOCK_LOCKER)
863 lrp->nlockers++;
864 }
865
866 *objp = sh_obj;
867 return (0);
868 }
869
870 /*
871 * Any lock on the waitlist has a process waiting for it. Therefore, we
872 * can't return the lock to the freelist immediately. Instead, we can
873 * remove the lock from the list of waiters, set the status field of the
874 * lock, and then let the process waking up return the lock to the
875 * free list.
876 */
877 static void
__lock_remove_waiter(lt,sh_obj,lockp,status)878 __lock_remove_waiter(lt, sh_obj, lockp, status)
879 DB_LOCKTAB *lt;
880 DB_LOCKOBJ *sh_obj;
881 struct __db_lock *lockp;
882 db_status_t status;
883 {
884 SH_TAILQ_REMOVE(&sh_obj->waiters, lockp, links, __db_lock);
885 lockp->status = status;
886
887 /* Wake whoever is waiting on this lock. */
888 (void)__db_mutex_unlock(&lockp->mutex, lt->reginfo.fd);
889 }
890
891 static void
__lock_checklocker(lt,lockp,do_remove)892 __lock_checklocker(lt, lockp, do_remove)
893 DB_LOCKTAB *lt;
894 struct __db_lock *lockp;
895 int do_remove;
896 {
897 DB_LOCKOBJ *sh_locker;
898
899 if (do_remove)
900 SH_LIST_REMOVE(lockp, locker_links, __db_lock);
901
902 /* if the locker list is NULL, free up the object. */
903 if (__lock_getobj(lt, lockp->holder, NULL, DB_LOCK_LOCKER, &sh_locker)
904 == 0 && SH_LIST_FIRST(&sh_locker->heldby, __db_lock) == NULL) {
905 __lock_freeobj(lt, sh_locker);
906 lt->region->nlockers--;
907 }
908 }
909
910 static void
__lock_freeobj(lt,obj)911 __lock_freeobj(lt, obj)
912 DB_LOCKTAB *lt;
913 DB_LOCKOBJ *obj;
914 {
915 HASHREMOVE_EL(lt->hashtab,
916 __db_lockobj, links, obj, lt->region->table_size, __lock_lhash);
917 if (obj->lockobj.size > sizeof(obj->objdata))
918 __db_shalloc_free(lt->mem, SH_DBT_PTR(&obj->lockobj));
919 SH_TAILQ_INSERT_HEAD(<->region->free_objs, obj, links, __db_lockobj);
920 }
921
922 /*
923 * __lock_downgrade --
924 * Used by the concurrent access product to downgrade write locks
925 * back to iwrite locks.
926 *
927 * PUBLIC: int __lock_downgrade __P((DB_LOCKTAB *,
928 * PUBLIC: DB_LOCK, db_lockmode_t, u_int32_t));
929 */
930 int
__lock_downgrade(lt,lock,new_mode,flags)931 __lock_downgrade(lt, lock, new_mode, flags)
932 DB_LOCKTAB *lt;
933 DB_LOCK lock;
934 db_lockmode_t new_mode;
935 u_int32_t flags;
936 {
937 struct __db_lock *lockp;
938 DB_LOCKOBJ *obj;
939 int ret;
940
941 COMPQUIET(flags, 0);
942 LOCK_PANIC_CHECK(lt);
943 LOCK_LOCKREGION(lt);
944
945 if ((ret = __lock_validate_region(lt)) == 0) {
946 lockp = OFFSET_TO_LOCK(lt, lock);
947 lockp->mode = new_mode;
948
949 /* Get the object associated with this lock. */
950 obj = (DB_LOCKOBJ *)((u_int8_t *)lockp + lockp->obj);
951 (void)__lock_promote(lt, obj);
952 ++lt->region->nreleases;
953 }
954
955 UNLOCK_LOCKREGION(lt);
956
957 return (ret);
958 }
959
960 /*
961 * __lock_promote --
962 *
963 * Look through the waiters and holders lists and decide which (if any)
964 * locks can be promoted. Promote any that are eligible.
965 */
966 static int
__lock_promote(lt,obj)967 __lock_promote(lt, obj)
968 DB_LOCKTAB *lt;
969 DB_LOCKOBJ *obj;
970 {
971 struct __db_lock *lp_w, *lp_h, *next_waiter;
972 int state_changed, waiter_is_txn;
973
974 /*
975 * We need to do lock promotion. We also need to determine if
976 * we're going to need to run the deadlock detector again. If
977 * we release locks, and there are waiters, but no one gets promoted,
978 * then we haven't fundamentally changed the lockmgr state, so
979 * we may still have a deadlock and we have to run again. However,
980 * if there were no waiters, or we actually promoted someone, then
981 * we are OK and we don't have to run it immediately.
982 *
983 * During promotion, we look for state changes so we can return
984 * this information to the caller.
985 */
986 for (lp_w = SH_TAILQ_FIRST(&obj->waiters, __db_lock),
987 state_changed = lp_w == NULL;
988 lp_w != NULL;
989 lp_w = next_waiter) {
990 waiter_is_txn = TXN_IS_HOLDING(lp_w);
991 next_waiter = SH_TAILQ_NEXT(lp_w, links, __db_lock);
992 for (lp_h = SH_TAILQ_FIRST(&obj->holders, __db_lock);
993 lp_h != NULL;
994 lp_h = SH_TAILQ_NEXT(lp_h, links, __db_lock)) {
995 if (CONFLICTS(lt, lp_h->mode, lp_w->mode) &&
996 lp_h->holder != lp_w->holder &&
997 !(waiter_is_txn &&
998 TXN_IS_HOLDING(lp_h) &&
999 __txn_is_ancestor(lt->dbenv->tx_info,
1000 lp_h->txnoff, lp_w->txnoff)))
1001 break;
1002 }
1003 if (lp_h != NULL) /* Found a conflict. */
1004 break;
1005
1006 /* No conflict, promote the waiting lock. */
1007 SH_TAILQ_REMOVE(&obj->waiters, lp_w, links, __db_lock);
1008 lp_w->status = DB_LSTAT_PENDING;
1009 SH_TAILQ_INSERT_TAIL(&obj->holders, lp_w, links);
1010
1011 /* Wake up waiter. */
1012 (void)__db_mutex_unlock(&lp_w->mutex, lt->reginfo.fd);
1013 state_changed = 1;
1014 }
1015
1016 return (state_changed);
1017 }
1018
1019 static int
__lock_is_parent(locker,txn)1020 __lock_is_parent(locker, txn)
1021 u_int32_t locker;
1022 DB_TXN *txn;
1023 {
1024 DB_TXN *t;
1025
1026 if (txn == NULL)
1027 return (0);
1028
1029 for (t = txn->parent; t != NULL; t = t->parent)
1030 if (t->txnid == locker)
1031 return (1);
1032
1033 return (0);
1034 }
1035