/*-
 * SPDX-License-Identifier: BSD-2-Clause
 *
 * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org>
 * Copyright (c) 2023 The FreeBSD Foundation
 *
 * Portions of this software were developed by
 * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from
 * the FreeBSD Foundation.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice unmodified, this list of conditions, and the following
 *    disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/param.h>
#include <sys/kassert.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/rangelock.h>
#include <sys/sleepqueue.h>
#include <sys/smr.h>
#include <sys/sysctl.h>

#include <vm/uma.h>

/*
 * Immediately after initialization (subject to 'rangelock_cheat'
 * below), and until a request comes that conflicts with granted ones
 * based on type, rangelocks serve requests in the "cheating" mode.
 * In this mode, a rangelock behaves like a sxlock, as if each request
 * covered the whole range of the protected object.  On receiving a
 * conflicting request (any request while a write request is
 * effective, or any write request while some read ones are
 * effective), all requests granted in "cheating" mode are drained,
 * and the rangelock then switches to effectively keeping track of the
 * precise range of each new request.
 *
 * Normal sx implementation is not used to not bloat structures (most
 * important, vnodes) which embeds rangelocks.
 *
 * The cheating greatly helps very common pattern where file is first
 * written single-threaded, and then read by many processes.
 *
 * Lock is in cheat mode when RL_CHEAT_CHEATING bit is set in the
 * lock->head.  Special cookies are returned in this mode, and
 * trylocks are same as normal locks but do not drain.
 */

static int rangelock_cheat = 1;
SYSCTL_INT(_debug, OID_AUTO, rangelock_cheat, CTLFLAG_RWTUN,
    &rangelock_cheat, 0,
    "");

#define	RL_CHEAT_MASK		0x7
#define	RL_CHEAT_CHEATING	0x1
/* #define	RL_CHEAT_RLOCKED	0x0 */
#define	RL_CHEAT_WLOCKED	0x2
#define	RL_CHEAT_DRAINING	0x4

#define	RL_CHEAT_READER		0x8

#define	RL_RET_CHEAT_RLOCKED	0x1100
#define	RL_RET_CHEAT_WLOCKED	0x2200

static void
rangelock_cheat_drain(struct rangelock *lock)
{
	uintptr_t v;

	DROP_GIANT();
	for (;;) {
		v = atomic_load_ptr(&lock->head);
		if ((v & RL_CHEAT_DRAINING) == 0)
			break;
		sleepq_add(&lock->head, NULL, "ranged1", 0, 0);
		sleepq_wait(&lock->head, PRI_USER);
		sleepq_lock(&lock->head);
	}
	sleepq_release(&lock->head);
	PICKUP_GIANT();
}

static bool
rangelock_cheat_lock(struct rangelock *lock, int locktype, bool trylock,
    void **cookie)
{
	uintptr_t v, x;

	v = atomic_load_ptr(&lock->head);
	if ((v & RL_CHEAT_CHEATING) == 0)
		return (false);
	if ((v & RL_CHEAT_DRAINING) != 0) {
drain:
		if (trylock) {
			*cookie = NULL;
			return (true);
		}
		sleepq_lock(&lock->head);
drain1:
		rangelock_cheat_drain(lock);
		return (false);
	}

	switch (locktype) {
	case RL_LOCK_READ:
		for (;;) {
			if ((v & RL_CHEAT_WLOCKED) != 0) {
				if (trylock) {
					*cookie = NULL;
					return (true);
				}
				x = v | RL_CHEAT_DRAINING;
				sleepq_lock(&lock->head);
				if (atomic_fcmpset_rel_ptr(&lock->head, &v,
				    x) != 0)
					goto drain1;
				sleepq_release(&lock->head);
				/* Possibly forgive passed conflict */
			} else {
				x = (v & ~RL_CHEAT_MASK) + RL_CHEAT_READER;
				x |= RL_CHEAT_CHEATING;
				if (atomic_fcmpset_acq_ptr(&lock->head, &v,
				    x) != 0)
					break;
			}
			if ((v & RL_CHEAT_CHEATING) == 0)
				return (false);
			if ((v & RL_CHEAT_DRAINING) != 0)
				goto drain;
		}
		*(uintptr_t *)cookie = RL_RET_CHEAT_RLOCKED;
		break;
	case RL_LOCK_WRITE:
		for (;;) {
			if ((v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER ||
			    (v & RL_CHEAT_WLOCKED) != 0) {
				if (trylock) {
					*cookie = NULL;
					return (true);
				}
				x = v | RL_CHEAT_DRAINING;
				sleepq_lock(&lock->head);
				if (atomic_fcmpset_rel_ptr(&lock->head, &v,
				    x) != 0)
					goto drain1;
				sleepq_release(&lock->head);
				/* Possibly forgive passed conflict */
			} else {
				x = RL_CHEAT_WLOCKED | RL_CHEAT_CHEATING;
				if (atomic_fcmpset_acq_ptr(&lock->head, &v,
				    x) != 0)
					break;
			}
			if ((v & RL_CHEAT_CHEATING) == 0)
				return (false);
			if ((v & RL_CHEAT_DRAINING) != 0)
				goto drain;
		}
		*(uintptr_t *)cookie = RL_RET_CHEAT_WLOCKED;
		break;
	default:
		__assert_unreachable();
		break;
	}
	return (true);
}

static bool
rangelock_cheat_unlock(struct rangelock *lock, void *cookie)
{
	uintptr_t v, x;

	v = atomic_load_ptr(&lock->head);
	if ((v & RL_CHEAT_CHEATING) == 0)
		return (false);

	MPASS((uintptr_t)cookie == RL_RET_CHEAT_WLOCKED ||
	    (uintptr_t)cookie == RL_RET_CHEAT_RLOCKED);

	switch ((uintptr_t)cookie) {
	case RL_RET_CHEAT_RLOCKED:
		for (;;) {
			MPASS((v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER);
			MPASS((v & RL_CHEAT_WLOCKED) == 0);
			x = (v & ~RL_CHEAT_MASK) - RL_CHEAT_READER;
			if ((v & RL_CHEAT_DRAINING) != 0) {
				if (x != 0) {
					x |= RL_CHEAT_DRAINING |
					    RL_CHEAT_CHEATING;
					if (atomic_fcmpset_rel_ptr(&lock->head,
					    &v, x) != 0)
						break;
				} else {
					sleepq_lock(&lock->head);
					if (atomic_fcmpset_rel_ptr(&lock->head,
					    &v, x) != 0) {
						sleepq_broadcast(
						    &lock->head,
						    SLEEPQ_SLEEP, 0, 0);
						sleepq_release(&lock->head);
						break;
					}
					sleepq_release(&lock->head);
				}
			} else {
				x |= RL_CHEAT_CHEATING;
				if (atomic_fcmpset_rel_ptr(&lock->head, &v,
				    x) != 0)
					break;
			}
		}
		break;
	case RL_RET_CHEAT_WLOCKED:
		for (;;) {
			MPASS((v & RL_CHEAT_WLOCKED) != 0);
			if ((v & RL_CHEAT_DRAINING) != 0) {
				sleepq_lock(&lock->head);
				atomic_store_ptr(&lock->head, 0);
				sleepq_broadcast(&lock->head,
				    SLEEPQ_SLEEP, 0, 0);
				sleepq_release(&lock->head);
				break;
			} else {
				if (atomic_fcmpset_ptr(&lock->head, &v,
				    RL_CHEAT_CHEATING) != 0)
					break;
			}
		}
		break;
	default:
		__assert_unreachable();
		break;
	}
	return (true);
}

static bool
rangelock_cheat_destroy(struct rangelock *lock)
{
	uintptr_t v;

	v = atomic_load_ptr(&lock->head);
	if ((v & RL_CHEAT_CHEATING) == 0)
		return (false);
	MPASS(v == RL_CHEAT_CHEATING);
	return (true);
}

/*
 * Implementation of range locks based on the paper
 * https://doi.org/10.1145/3342195.3387533
 * arXiv:2006.12144v1 [cs.OS] 22 Jun 2020
 * Scalable Range Locks for Scalable Address Spaces and Beyond
 * by Alex Kogan, Dave Dice, and Shady Issa
 */

static struct rl_q_entry *rl_e_unmark(const struct rl_q_entry *e);

/*
 * rl_q_next links all granted ranges in the lock.  We cannot free an
 * rl_q_entry while in the smr section, and cannot reuse rl_q_next
 * linkage since other threads might follow it even after CAS removed
 * the range.  Use rl_q_free for local list of ranges to remove after
 * the smr section is dropped.
 */
struct rl_q_entry {
	struct rl_q_entry *rl_q_next;
	struct rl_q_entry *rl_q_free;
	off_t		rl_q_start, rl_q_end;
	int		rl_q_flags;
#ifdef INVARIANTS
	struct thread	*rl_q_owner;
#endif
};

static uma_zone_t rl_entry_zone;
static smr_t rl_smr;

static void rangelock_free_free(struct rl_q_entry *free);
static void rangelock_noncheating_destroy(struct rangelock *lock);

static void
rangelock_sys_init(void)
{
	rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry),
	    NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct rl_q_entry),
	    UMA_ZONE_SMR);
	rl_smr = uma_zone_get_smr(rl_entry_zone);
}
SYSINIT(rl, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL);

static struct rl_q_entry *
rlqentry_alloc(vm_ooffset_t start, vm_ooffset_t end, int flags)
{
	struct rl_q_entry *e;

	e = uma_zalloc_smr(rl_entry_zone, M_WAITOK);
	e->rl_q_next = NULL;
	e->rl_q_free = NULL;
	e->rl_q_start = start;
	e->rl_q_end = end;
	e->rl_q_flags = flags;
#ifdef INVARIANTS
	e->rl_q_owner = curthread;
#endif
	return (e);
}

void
rangelock_init(struct rangelock *lock)
{
	lock->sleepers = false;
	atomic_store_ptr(&lock->head, rangelock_cheat ? RL_CHEAT_CHEATING : 0);
}

void
rangelock_destroy(struct rangelock *lock)
{
	MPASS(!lock->sleepers);
	if (!rangelock_cheat_destroy(lock))
		rangelock_noncheating_destroy(lock);
	DEBUG_POISON_POINTER(*(void **)&lock->head);
}

static bool
rl_e_is_marked(const struct rl_q_entry *e)
{
	return (((uintptr_t)e & 1) != 0);
}

static struct rl_q_entry *
rl_e_unmark_unchecked(const struct rl_q_entry *e)
{
	return ((struct rl_q_entry *)((uintptr_t)e & ~1));
}

static struct rl_q_entry *
rl_e_unmark(const struct rl_q_entry *e)
{
	MPASS(rl_e_is_marked(e));
	return (rl_e_unmark_unchecked(e));
}

static void
rl_e_mark(struct rl_q_entry *e)
{
#if defined(INVARIANTS) && defined(__LP64__)
	int r = atomic_testandset_long((uintptr_t *)&e->rl_q_next, 0);
	MPASS(r == 0);
#else
	atomic_set_ptr((uintptr_t *)&e->rl_q_next, 1);
#endif
}

static struct rl_q_entry *
rl_q_load(struct rl_q_entry **p)
{
	return ((struct rl_q_entry *)atomic_load_acq_ptr((uintptr_t *)p));
}

static bool
rl_e_is_rlock(const struct rl_q_entry *e)
{
	return ((e->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ);
}

static void
rangelock_free_free(struct rl_q_entry *free)
{
	struct rl_q_entry *x, *xp;

	for (x = free; x != NULL; x = xp) {
		MPASS(!rl_e_is_marked(x));
		xp = x->rl_q_free;
		MPASS(!rl_e_is_marked(xp));
		uma_zfree_smr(rl_entry_zone, x);
	}
}

static void
rangelock_unlock_int(struct rangelock *lock, struct rl_q_entry *e)
{
	bool sleepers;

	MPASS(lock != NULL && e != NULL);
	MPASS(!rl_e_is_marked(rl_q_load(&e->rl_q_next)));
	MPASS(e->rl_q_owner == curthread);

	rl_e_mark(e);
	sleepers = lock->sleepers;
	lock->sleepers = false;
	if (sleepers)
		sleepq_broadcast(&lock->sleepers, SLEEPQ_SLEEP, 0, 0);
}

void
rangelock_unlock(struct rangelock *lock, void *cookie)
{
	if (rangelock_cheat_unlock(lock, cookie))
		return;

	sleepq_lock(&lock->sleepers);
	rangelock_unlock_int(lock, cookie);
	sleepq_release(&lock->sleepers);
}

/*
 * result: -1 if e1 before e2, or both locks are readers and e1
 *		starts before or at e2
 *          1 if e1 after e2, or both locks are readers and e1
 *		starts after e2
 *          0 if e1 and e2 overlap and at least one lock is writer
 */
static int
rl_e_compare(const struct rl_q_entry *e1, const struct rl_q_entry *e2)
{
	bool rds;

	if (e1 == NULL)
		return (1);
	if (e2->rl_q_start >= e1->rl_q_end)
		return (-1);
	rds = rl_e_is_rlock(e1) && rl_e_is_rlock(e2);
	if (e2->rl_q_start >= e1->rl_q_start && rds)
		return (-1);
	if (e1->rl_q_start >= e2->rl_q_end)
		return (1);
	if (e1->rl_q_start >= e2->rl_q_start && rds)
		return (1);
	return (0);
}

static void
rl_insert_sleep(struct rangelock *lock)
{
	smr_exit(rl_smr);
	DROP_GIANT();
	lock->sleepers = true;
	sleepq_add(&lock->sleepers, NULL, "rangelk", 0, 0);
	sleepq_wait(&lock->sleepers, PRI_USER);
	PICKUP_GIANT();
	smr_enter(rl_smr);
}

static bool
rl_q_cas(struct rl_q_entry **prev, struct rl_q_entry *old,
    struct rl_q_entry *new)
{
	MPASS(!rl_e_is_marked(old));
	return (atomic_cmpset_rel_ptr((uintptr_t *)prev, (uintptr_t)old,
	    (uintptr_t)new) != 0);
}

static void
rangelock_noncheating_destroy(struct rangelock *lock)
{
	struct rl_q_entry *cur, *free, *next, **prev;

	free = NULL;
again:
	smr_enter(rl_smr);
	prev = (struct rl_q_entry **)&lock->head;
	cur = rl_q_load(prev);
	MPASS(!rl_e_is_marked(cur));

	for (;;) {
		if (cur == NULL)
			break;
		if (rl_e_is_marked(cur))
			goto again;

		next = rl_q_load(&cur->rl_q_next);
		if (rl_e_is_marked(next)) {
			next = rl_e_unmark(next);
			if (rl_q_cas(prev, cur, next)) {
#ifdef INVARIANTS
				cur->rl_q_owner = NULL;
#endif
				cur->rl_q_free = free;
				free = cur;
				cur = next;
				continue;
			}
			smr_exit(rl_smr);
			goto again;
		}

		sleepq_lock(&lock->sleepers);
		if (!rl_e_is_marked(cur)) {
			rl_insert_sleep(lock);
			goto again;
		}
	}
	smr_exit(rl_smr);
	rangelock_free_free(free);
}

enum RL_INSERT_RES {
	RL_TRYLOCK_FAILED,
	RL_LOCK_SUCCESS,
	RL_LOCK_RETRY,
};

static enum RL_INSERT_RES
rl_r_validate(struct rangelock *lock, struct rl_q_entry *e, bool trylock,
    struct rl_q_entry **free)
{
	struct rl_q_entry *cur, *next, **prev;

again:
	prev = &e->rl_q_next;
	cur = rl_q_load(prev);
	MPASS(!rl_e_is_marked(cur));	/* nobody can unlock e yet */
	for (;;) {
		if (cur == NULL || cur->rl_q_start >= e->rl_q_end)
			return (RL_LOCK_SUCCESS);
		next = rl_q_load(&cur->rl_q_next);
		if (rl_e_is_marked(next)) {
			next = rl_e_unmark(next);
			if (rl_q_cas(prev, cur, next)) {
				cur->rl_q_free = *free;
				*free = cur;
				cur = next;
				continue;
			}
			goto again;
		}
		if (rl_e_is_rlock(cur)) {
			prev = &cur->rl_q_next;
			cur = rl_e_unmark_unchecked(rl_q_load(prev));
			continue;
		}
		if (!rl_e_is_marked(rl_q_load(&cur->rl_q_next))) {
			sleepq_lock(&lock->sleepers);
			if (rl_e_is_marked(rl_q_load(&cur->rl_q_next))) {
				sleepq_release(&lock->sleepers);
				continue;
			}
			rangelock_unlock_int(lock, e);
			if (trylock) {
				sleepq_release(&lock->sleepers);
				return (RL_TRYLOCK_FAILED);
			}
			rl_insert_sleep(lock);
			return (RL_LOCK_RETRY);
		}
	}
}

static enum RL_INSERT_RES
rl_w_validate(struct rangelock *lock, struct rl_q_entry *e,
    bool trylock, struct rl_q_entry **free)
{
	struct rl_q_entry *cur, *next, **prev;

again:
	prev = (struct rl_q_entry **)&lock->head;
	cur = rl_q_load(prev);
	MPASS(!rl_e_is_marked(cur));	/* head is not marked */
	for (;;) {
		if (cur == e)
			return (RL_LOCK_SUCCESS);
		next = rl_q_load(&cur->rl_q_next);
		if (rl_e_is_marked(next)) {
			next = rl_e_unmark(next);
			if (rl_q_cas(prev, cur, next)) {
				cur->rl_q_free = *free;
				*free = cur;
				cur = next;
				continue;
			}
			goto again;
		}
		if (cur->rl_q_end <= e->rl_q_start) {
			prev = &cur->rl_q_next;
			cur = rl_e_unmark_unchecked(rl_q_load(prev));
			continue;
		}
		sleepq_lock(&lock->sleepers);
		/* Reload after sleepq is locked */
		next = rl_q_load(&cur->rl_q_next);
		if (rl_e_is_marked(next)) {
			sleepq_release(&lock->sleepers);
			goto again;
		}
		rangelock_unlock_int(lock, e);
		if (trylock) {
			sleepq_release(&lock->sleepers);
			return (RL_TRYLOCK_FAILED);
		}
		rl_insert_sleep(lock);
		return (RL_LOCK_RETRY);
	}
}

static enum RL_INSERT_RES
rl_insert(struct rangelock *lock, struct rl_q_entry *e, bool trylock,
    struct rl_q_entry **free)
{
	struct rl_q_entry *cur, *next, **prev;
	int r;

again:
	prev = (struct rl_q_entry **)&lock->head;
	cur = rl_q_load(prev);
	if (cur == NULL && rl_q_cas(prev, NULL, e))
		return (RL_LOCK_SUCCESS);

	for (;;) {
		if (cur != NULL) {
			if (rl_e_is_marked(cur))
				goto again;

			next = rl_q_load(&cur->rl_q_next);
			if (rl_e_is_marked(next)) {
				next = rl_e_unmark(next);
				if (rl_q_cas(prev, cur, next)) {
#ifdef INVARIANTS
					cur->rl_q_owner = NULL;
#endif
					cur->rl_q_free = *free;
					*free = cur;
					cur = next;
					continue;
				}
				goto again;
			}
		}

		MPASS(!rl_e_is_marked(cur));
		r = rl_e_compare(cur, e);
		if (r == -1) {
			prev = &cur->rl_q_next;
			cur = rl_q_load(prev);
		} else if (r == 0) {
			sleepq_lock(&lock->sleepers);
			if (__predict_false(rl_e_is_marked(rl_q_load(
			    &cur->rl_q_next)))) {
				sleepq_release(&lock->sleepers);
				continue;
			}
			if (trylock) {
				sleepq_release(&lock->sleepers);
				return (RL_TRYLOCK_FAILED);
			}
			rl_insert_sleep(lock);
			/* e is still valid */
			goto again;
		} else /* r == 1 */ {
			e->rl_q_next = cur;
			if (rl_q_cas(prev, cur, e)) {
				atomic_thread_fence_acq();
				return (rl_e_is_rlock(e) ?
				    rl_r_validate(lock, e, trylock, free) :
				    rl_w_validate(lock, e, trylock, free));
			}
			/* Reset rl_q_next in case we hit fast path. */
			e->rl_q_next = NULL;
			cur = rl_q_load(prev);
		}
	}
}

static struct rl_q_entry *
rangelock_lock_int(struct rangelock *lock, bool trylock, vm_ooffset_t start,
    vm_ooffset_t end, int locktype)
{
	struct rl_q_entry *e, *free;
	void *cookie;
	enum RL_INSERT_RES res;

	if (rangelock_cheat_lock(lock, locktype, trylock, &cookie))
		return (cookie);
	for (res = RL_LOCK_RETRY; res == RL_LOCK_RETRY;) {
		free = NULL;
		e = rlqentry_alloc(start, end, locktype);
		smr_enter(rl_smr);
		res = rl_insert(lock, e, trylock, &free);
		smr_exit(rl_smr);
		if (res == RL_TRYLOCK_FAILED) {
			MPASS(trylock);
			e->rl_q_free = free;
			free = e;
			e = NULL;
		}
		rangelock_free_free(free);
	}
	return (e);
}

void *
rangelock_rlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
{
	return (rangelock_lock_int(lock, false, start, end, RL_LOCK_READ));
}

void *
rangelock_tryrlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
{
	return (rangelock_lock_int(lock, true, start, end, RL_LOCK_READ));
}

void *
rangelock_wlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
{
	return (rangelock_lock_int(lock, false, start, end, RL_LOCK_WRITE));
}

void *
rangelock_trywlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
{
	return (rangelock_lock_int(lock, true, start, end, RL_LOCK_WRITE));
}

/*
 * If the caller asserts that it can obtain the range locks on the
 * same lock simultaneously, switch to the non-cheat mode.  Cheat mode
 * cannot handle it, hanging in drain or trylock retries.
 */
void
rangelock_may_recurse(struct rangelock *lock)
{
	uintptr_t v, x;

	v = atomic_load_ptr(&lock->head);
	if ((v & RL_CHEAT_CHEATING) == 0)
		return;

	sleepq_lock(&lock->head);
	for (;;) {
		if ((v & RL_CHEAT_CHEATING) == 0) {
			sleepq_release(&lock->head);
			return;
		}

		/* Cheating and locked, drain. */
		if ((v & RL_CHEAT_WLOCKED) != 0 ||
		    (v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER) {
			x = v | RL_CHEAT_DRAINING;
			if (atomic_fcmpset_ptr(&lock->head, &v, x) != 0) {
				rangelock_cheat_drain(lock);
				return;
			}
			continue;
		}

		/* Cheating and unlocked, clear RL_CHEAT_CHEATING. */
		x = 0;
		if (atomic_fcmpset_ptr(&lock->head, &v, x) != 0) {
			sleepq_release(&lock->head);
			return;
		}
	}
}

#ifdef INVARIANT_SUPPORT
void
_rangelock_cookie_assert(void *cookie, int what, const char *file, int line)
{
}
#endif	/* INVARIANT_SUPPORT */

#include "opt_ddb.h"
#ifdef DDB
#include <ddb/ddb.h>

DB_SHOW_COMMAND(rangelock, db_show_rangelock)
{
	struct rangelock *lock;
	struct rl_q_entry *e, *x;
	uintptr_t v;

	if (!have_addr) {
		db_printf("show rangelock addr\n");
		return;
	}

	lock = (struct rangelock *)addr;
	db_printf("rangelock %p sleepers %d\n", lock, lock->sleepers);
	v = lock->head;
	if ((v & RL_CHEAT_CHEATING) != 0) {
		db_printf("  cheating head %#jx\n", (uintmax_t)v);
		return;
	}
	for (e = (struct rl_q_entry *)(lock->head);;) {
		x = rl_e_is_marked(e) ? rl_e_unmark(e) : e;
		if (x == NULL)
			break;
		db_printf("  entry %p marked %d %d start %#jx end %#jx "
		    "flags %x next %p",
		    e, rl_e_is_marked(e), rl_e_is_marked(x->rl_q_next),
		    x->rl_q_start, x->rl_q_end, x->rl_q_flags, x->rl_q_next);
#ifdef INVARIANTS
		db_printf(" owner %p (%d)", x->rl_q_owner,
		    x->rl_q_owner != NULL ? x->rl_q_owner->td_tid : -1);
#endif
		db_printf("\n");
		e = x->rl_q_next;
	}
}

#endif	/* DDB */