/*
 * CDDL HEADER START
 *
 * This file and its contents are supplied under the terms of the
 * Common Development and Distribution License ("CDDL"), version 1.0.
 * You may only use this file in accordance with the terms of version
 * 1.0 of the CDDL.
 *
 * A full copy of the text of the CDDL should have accompanied this
 * source.  A copy of the CDDL is also available via the Internet at
 * http://www.illumos.org/license/CDDL.
 *
 * CDDL HEADER END
 */

/*
 * Copyright (c) 2015 by Delphix. All rights reserved.
 */

#include <sys/dmu_tx.h>
#include <sys/spa.h>
#include <sys/dmu.h>
#include <sys/dsl_pool.h>
#include <sys/vdev_indirect_births.h>

#ifdef ZFS_DEBUG
static boolean_t
vdev_indirect_births_verify(vdev_indirect_births_t *vib)
{
	ASSERT(vib != NULL);

	ASSERT(vib->vib_object != 0);
	ASSERT(vib->vib_objset != NULL);
	ASSERT(vib->vib_phys != NULL);
	ASSERT(vib->vib_dbuf != NULL);

	EQUIV(vib->vib_phys->vib_count > 0, vib->vib_entries != NULL);

	return (B_TRUE);
}
#else
#define	vdev_indirect_births_verify(vib) ((void) sizeof (vib), B_TRUE)
#endif

uint64_t
vdev_indirect_births_count(vdev_indirect_births_t *vib)
{
	ASSERT(vdev_indirect_births_verify(vib));

	return (vib->vib_phys->vib_count);
}

uint64_t
vdev_indirect_births_object(vdev_indirect_births_t *vib)
{
	ASSERT(vdev_indirect_births_verify(vib));

	return (vib->vib_object);
}

static uint64_t
vdev_indirect_births_size_impl(vdev_indirect_births_t *vib)
{
	return (vib->vib_phys->vib_count * sizeof (*vib->vib_entries));
}

void
vdev_indirect_births_close(vdev_indirect_births_t *vib)
{
	ASSERT(vdev_indirect_births_verify(vib));

	if (vib->vib_phys->vib_count > 0) {
		uint64_t births_size = vdev_indirect_births_size_impl(vib);

		vmem_free(vib->vib_entries, births_size);
		vib->vib_entries = NULL;
	}

	dmu_buf_rele(vib->vib_dbuf, vib);

	vib->vib_objset = NULL;
	vib->vib_object = 0;
	vib->vib_dbuf = NULL;
	vib->vib_phys = NULL;

	kmem_free(vib, sizeof (*vib));
}

uint64_t
vdev_indirect_births_alloc(objset_t *os, dmu_tx_t *tx)
{
	ASSERT(dmu_tx_is_syncing(tx));

	return (dmu_object_alloc(os,
	    DMU_OTN_UINT64_METADATA, SPA_OLD_MAXBLOCKSIZE,
	    DMU_OTN_UINT64_METADATA, sizeof (vdev_indirect_birth_phys_t),
	    tx));
}

vdev_indirect_births_t *
vdev_indirect_births_open(objset_t *os, uint64_t births_object)
{
	vdev_indirect_births_t *vib = kmem_zalloc(sizeof (*vib), KM_SLEEP);

	vib->vib_objset = os;
	vib->vib_object = births_object;

	VERIFY0(dmu_bonus_hold(os, vib->vib_object, vib, &vib->vib_dbuf));
	vib->vib_phys = vib->vib_dbuf->db_data;

	if (vib->vib_phys->vib_count > 0) {
		uint64_t births_size = vdev_indirect_births_size_impl(vib);
		vib->vib_entries = vmem_alloc(births_size, KM_SLEEP);
		VERIFY0(dmu_read(vib->vib_objset, vib->vib_object, 0,
		    births_size, vib->vib_entries, DMU_READ_PREFETCH));
	}

	ASSERT(vdev_indirect_births_verify(vib));

	return (vib);
}

void
vdev_indirect_births_free(objset_t *os, uint64_t object, dmu_tx_t *tx)
{
	VERIFY0(dmu_object_free(os, object, tx));
}

void
vdev_indirect_births_add_entry(vdev_indirect_births_t *vib,
    uint64_t max_offset, uint64_t txg, dmu_tx_t *tx)
{
	vdev_indirect_birth_entry_phys_t vibe;
	uint64_t old_size;
	uint64_t new_size;
	vdev_indirect_birth_entry_phys_t *new_entries;

	ASSERT(dmu_tx_is_syncing(tx));
	ASSERT(dsl_pool_sync_context(dmu_tx_pool(tx)));
	ASSERT(vdev_indirect_births_verify(vib));

	dmu_buf_will_dirty(vib->vib_dbuf, tx);

	vibe.vibe_offset = max_offset;
	vibe.vibe_phys_birth_txg = txg;

	old_size = vdev_indirect_births_size_impl(vib);
	dmu_write(vib->vib_objset, vib->vib_object, old_size, sizeof (vibe),
	    &vibe, tx);
	vib->vib_phys->vib_count++;
	new_size = vdev_indirect_births_size_impl(vib);

	new_entries = vmem_alloc(new_size, KM_SLEEP);
	if (old_size > 0) {
		memcpy(new_entries, vib->vib_entries, old_size);
		vmem_free(vib->vib_entries, old_size);
	}
	new_entries[vib->vib_phys->vib_count - 1] = vibe;
	vib->vib_entries = new_entries;
}

uint64_t
vdev_indirect_births_last_entry_txg(vdev_indirect_births_t *vib)
{
	ASSERT(vdev_indirect_births_verify(vib));
	ASSERT(vib->vib_phys->vib_count > 0);

	vdev_indirect_birth_entry_phys_t *last =
	    &vib->vib_entries[vib->vib_phys->vib_count - 1];
	return (last->vibe_phys_birth_txg);
}

/*
 * Return the txg in which the given range was copied (i.e. its physical
 * birth txg).  The specified offset+asize must be contiguously mapped
 * (i.e. not a split block).
 *
 * The entries are sorted by increasing phys_birth, and also by increasing
 * offset.  We find the specified offset by binary search.  Note that we
 * can not use bsearch() because looking at each entry independently is
 * insufficient to find the correct entry.  Each entry implicitly relies
 * on the previous entry: an entry indicates that the offsets from the
 * end of the previous entry to the end of this entry were written in the
 * specified txg.
 */
uint64_t
vdev_indirect_births_physbirth(vdev_indirect_births_t *vib, uint64_t offset,
    uint64_t asize)
{
	vdev_indirect_birth_entry_phys_t *base;
	vdev_indirect_birth_entry_phys_t *last;

	ASSERT(vdev_indirect_births_verify(vib));
	ASSERT(vib->vib_phys->vib_count > 0);

	base = vib->vib_entries;
	last = base + vib->vib_phys->vib_count - 1;

	ASSERT3U(offset, <, last->vibe_offset);

	while (last >= base) {
		vdev_indirect_birth_entry_phys_t *p =
		    base + ((last - base) / 2);
		if (offset >= p->vibe_offset) {
			base = p + 1;
		} else if (p == vib->vib_entries ||
		    offset >= (p - 1)->vibe_offset) {
			ASSERT3U(offset + asize, <=, p->vibe_offset);
			return (p->vibe_phys_birth_txg);
		} else {
			last = p - 1;
		}
	}
	ASSERT(!"offset not found");
	return (-1);
}

#if defined(_KERNEL)
EXPORT_SYMBOL(vdev_indirect_births_add_entry);
EXPORT_SYMBOL(vdev_indirect_births_alloc);
EXPORT_SYMBOL(vdev_indirect_births_close);
EXPORT_SYMBOL(vdev_indirect_births_count);
EXPORT_SYMBOL(vdev_indirect_births_free);
EXPORT_SYMBOL(vdev_indirect_births_last_entry_txg);
EXPORT_SYMBOL(vdev_indirect_births_object);
EXPORT_SYMBOL(vdev_indirect_births_open);
EXPORT_SYMBOL(vdev_indirect_births_physbirth);
#endif