/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 *
 * Portions Copyright 2006-2008 John Birrell jb@freebsd.org
 *
 */

/*
 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/cpuvar.h>
#include <sys/endian.h>
#include <sys/fcntl.h>
#include <sys/filio.h>
#include <sys/kdb.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/kthread.h>
#include <sys/limits.h>
#include <sys/linker.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/mutex.h>
#include <sys/pcpu.h>
#include <sys/poll.h>
#include <sys/proc.h>
#include <sys/selinfo.h>
#include <sys/smp.h>
#include <sys/syscall.h>
#include <sys/sysent.h>
#include <sys/sysproto.h>
#include <sys/uio.h>
#include <sys/unistd.h>
#include <machine/stdarg.h>

#include <sys/dtrace.h>
#include <sys/dtrace_bsd.h>

#include "fbt.h"

MALLOC_DEFINE(M_FBT, "fbt", "Function Boundary Tracing");

dtrace_provider_id_t	fbt_id;
fbt_probe_t		**fbt_probetab;
int			fbt_probetab_mask;

static int	fbt_unload(void);
static void	fbt_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *);
static void	fbt_provide_module(void *, modctl_t *);
static void	fbt_destroy(void *, dtrace_id_t, void *);
static void	fbt_enable(void *, dtrace_id_t, void *);
static void	fbt_disable(void *, dtrace_id_t, void *);
static void	fbt_load(void *);
static void	fbt_suspend(void *, dtrace_id_t, void *);
static void	fbt_resume(void *, dtrace_id_t, void *);

static dtrace_pattr_t fbt_attr = {
{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
};

static dtrace_pops_t fbt_pops = {
	.dtps_provide =		NULL,
	.dtps_provide_module =	fbt_provide_module,
	.dtps_enable =		fbt_enable,
	.dtps_disable =		fbt_disable,
	.dtps_suspend =		fbt_suspend,
	.dtps_resume =		fbt_resume,
	.dtps_getargdesc =	fbt_getargdesc,
	.dtps_getargval =	NULL,
	.dtps_usermode =	NULL,
	.dtps_destroy =		fbt_destroy
};

static int			fbt_probetab_size;
static int			fbt_verbose = 0;

int
fbt_excluded(const char *name)
{

	if (strncmp(name, "dtrace_", 7) == 0 &&
	    strncmp(name, "dtrace_safe_", 12) != 0) {
		/*
		 * Anything beginning with "dtrace_" may be called
		 * from probe context unless it explicitly indicates
		 * that it won't be called from probe context by
		 * using the prefix "dtrace_safe_".
		 */
		return (1);
	}

	/*
	 * Omit instrumentation of functions that are probably in DDB.  It
	 * makes it too hard to debug broken FBT.
	 *
	 * NB: kdb_enter() can be excluded, but its call to printf() can't be.
	 * This is generally OK since we're not yet in debugging context.
	 */
	if (strncmp(name, "db_", 3) == 0 ||
	    strncmp(name, "kdb_", 4) == 0)
		return (1);

	/*
	 * Lock owner methods may be called from probe context.
	 */
	if (strcmp(name, "owner_mtx") == 0 ||
	    strcmp(name, "owner_rm") == 0 ||
	    strcmp(name, "owner_rw") == 0 ||
	    strcmp(name, "owner_sx") == 0)
		return (1);

	/*
	 * The KMSAN runtime can't be instrumented safely.
	 */
	if (strncmp(name, "__msan", 6) == 0 ||
	    strncmp(name, "kmsan_", 6) == 0)
		return (1);

	/*
	 * Stack unwinders may be called from probe context on some
	 * platforms.
	 */
#if defined(__aarch64__) || defined(__riscv)
	if (strcmp(name, "unwind_frame") == 0)
		return (1);
#endif

	/*
	 * When DTrace is built into the kernel we need to exclude
	 * the FBT functions from instrumentation.
	 */
#ifndef _KLD_MODULE
	if (strncmp(name, "fbt_", 4) == 0)
		return (1);
#endif

	return (0);
}

static void
fbt_doubletrap(void)
{
	fbt_probe_t *fbt;
	int i;

	for (i = 0; i < fbt_probetab_size; i++) {
		fbt = fbt_probetab[i];

		for (; fbt != NULL; fbt = fbt->fbtp_probenext)
			fbt_patch_tracepoint(fbt, fbt->fbtp_savedval);
	}
}

static void
fbt_provide_module(void *arg, modctl_t *lf)
{
	char modname[MAXPATHLEN];
	int i;
	size_t len;

	strlcpy(modname, lf->filename, sizeof(modname));
	len = strlen(modname);
	if (len > 3 && strcmp(modname + len - 3, ".ko") == 0)
		modname[len - 3] = '\0';

	/*
	 * Employees of dtrace and their families are ineligible.  Void
	 * where prohibited.
	 */
	if (strcmp(modname, "dtrace") == 0)
		return;

	/*
	 * To register with DTrace, a module must list 'dtrace' as a
	 * dependency in order for the kernel linker to resolve
	 * symbols like dtrace_register(). All modules with such a
	 * dependency are ineligible for FBT tracing.
	 */
	for (i = 0; i < lf->ndeps; i++)
		if (strncmp(lf->deps[i]->filename, "dtrace", 6) == 0)
			return;

	if (lf->fbt_nentries) {
		/*
		 * This module has some FBT entries allocated; we're afraid
		 * to screw with it.
		 */
		return;
	}

	/*
	 * List the functions in the module and the symbol values.
	 */
	(void) linker_file_function_listall(lf, fbt_provide_module_function, modname);
}

static void
fbt_destroy_one(fbt_probe_t *fbt)
{
	fbt_probe_t *hash, *hashprev, *next;
	int ndx;

	ndx = FBT_ADDR2NDX(fbt->fbtp_patchpoint);
	for (hash = fbt_probetab[ndx], hashprev = NULL; hash != NULL;
	    hashprev = hash, hash = hash->fbtp_hashnext) {
		if (hash == fbt) {
			if ((next = fbt->fbtp_tracenext) != NULL)
				next->fbtp_hashnext = hash->fbtp_hashnext;
			else
				next = hash->fbtp_hashnext;
			if (hashprev != NULL)
				hashprev->fbtp_hashnext = next;
			else
				fbt_probetab[ndx] = next;
			goto free;
		} else if (hash->fbtp_patchpoint == fbt->fbtp_patchpoint) {
			for (next = hash; next->fbtp_tracenext != NULL;
			    next = next->fbtp_tracenext) {
				if (fbt == next->fbtp_tracenext) {
					next->fbtp_tracenext =
					    fbt->fbtp_tracenext;
					goto free;
				}
			}
		}
	}
	panic("probe %p not found in hash table", fbt);
free:
	free(fbt, M_FBT);
}

static void
fbt_destroy(void *arg, dtrace_id_t id, void *parg)
{
	fbt_probe_t *fbt = parg, *next;
	modctl_t *ctl;

	do {
		ctl = fbt->fbtp_ctl;
		ctl->fbt_nentries--;

		next = fbt->fbtp_probenext;
		fbt_destroy_one(fbt);
		fbt = next;
	} while (fbt != NULL);
}

static void
fbt_enable(void *arg, dtrace_id_t id, void *parg)
{
	fbt_probe_t *fbt = parg;
	modctl_t *ctl = fbt->fbtp_ctl;

	ctl->nenabled++;

	/*
	 * Now check that our modctl has the expected load count.  If it
	 * doesn't, this module must have been unloaded and reloaded -- and
	 * we're not going to touch it.
	 */
	if (ctl->loadcnt != fbt->fbtp_loadcnt) {
		if (fbt_verbose) {
			printf("fbt is failing for probe %s "
			    "(module %s reloaded)",
			    fbt->fbtp_name, ctl->filename);
		}

		return;
	}

	for (; fbt != NULL; fbt = fbt->fbtp_probenext) {
		fbt_patch_tracepoint(fbt, fbt->fbtp_patchval);
		fbt->fbtp_enabled++;
	}
}

static void
fbt_disable(void *arg, dtrace_id_t id, void *parg)
{
	fbt_probe_t *fbt = parg, *hash;
	modctl_t *ctl = fbt->fbtp_ctl;

	ASSERT(ctl->nenabled > 0);
	ctl->nenabled--;

	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
		return;

	for (; fbt != NULL; fbt = fbt->fbtp_probenext) {
		fbt->fbtp_enabled--;

		for (hash = fbt_probetab[FBT_ADDR2NDX(fbt->fbtp_patchpoint)];
		    hash != NULL; hash = hash->fbtp_hashnext) {
			if (hash->fbtp_patchpoint == fbt->fbtp_patchpoint) {
				for (; hash != NULL; hash = hash->fbtp_tracenext)
					if (hash->fbtp_enabled > 0)
						break;
				break;
			}
		}
		if (hash == NULL)
			fbt_patch_tracepoint(fbt, fbt->fbtp_savedval);
	}
}

static void
fbt_suspend(void *arg, dtrace_id_t id, void *parg)
{
	fbt_probe_t *fbt = parg;
	modctl_t *ctl = fbt->fbtp_ctl;

	ASSERT(ctl->nenabled > 0);

	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
		return;

	for (; fbt != NULL; fbt = fbt->fbtp_probenext)
		fbt_patch_tracepoint(fbt, fbt->fbtp_savedval);
}

static void
fbt_resume(void *arg, dtrace_id_t id, void *parg)
{
	fbt_probe_t *fbt = parg;
	modctl_t *ctl = fbt->fbtp_ctl;

	ASSERT(ctl->nenabled > 0);

	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
		return;

	for (; fbt != NULL; fbt = fbt->fbtp_probenext)
		fbt_patch_tracepoint(fbt, fbt->fbtp_patchval);
}

static int
fbt_ctfoff_init(modctl_t *lf, linker_ctf_t *lc)
{
	const Elf_Sym *symp = lc->symtab;
	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
	const uint8_t *ctfdata = lc->ctftab + sizeof(ctf_header_t);
	size_t idwidth;
	int i;
	uint32_t *ctfoff;
	uint32_t objtoff = hp->cth_objtoff;
	uint32_t funcoff = hp->cth_funcoff;
	uint_t kind, info, vlen;

	/* Sanity check. */
	if (hp->cth_magic != CTF_MAGIC) {
		printf("Bad magic value in CTF data of '%s'\n",lf->pathname);
		return (EINVAL);
	}

	if (lc->symtab == NULL) {
		printf("No symbol table in '%s'\n",lf->pathname);
		return (EINVAL);
	}

	ctfoff = malloc(sizeof(uint32_t) * lc->nsym, M_LINKER, M_WAITOK);
	*lc->ctfoffp = ctfoff;

	idwidth = hp->cth_version == CTF_VERSION_2 ? 2 : 4;

	for (i = 0; i < lc->nsym; i++, ctfoff++, symp++) {
		if (symp->st_name == 0 || symp->st_shndx == SHN_UNDEF) {
			*ctfoff = 0xffffffff;
			continue;
		}

		switch (ELF_ST_TYPE(symp->st_info)) {
		case STT_OBJECT:
			if (objtoff >= hp->cth_funcoff ||
			    (symp->st_shndx == SHN_ABS && symp->st_value == 0)) {
				*ctfoff = 0xffffffff;
				break;
			}

			*ctfoff = objtoff;
			objtoff += idwidth;
			break;

		case STT_FUNC:
			if (funcoff >= hp->cth_typeoff) {
				*ctfoff = 0xffffffff;
				break;
			}

			*ctfoff = funcoff;

			info = 0;
			memcpy(&info, ctfdata + funcoff, idwidth);
			if (hp->cth_version == CTF_VERSION_2) {
				kind = CTF_V2_INFO_KIND(info);
				vlen = CTF_V2_INFO_VLEN(info);
			} else {
				kind = CTF_V3_INFO_KIND(info);
				vlen = CTF_V3_INFO_VLEN(info);
			}

			/*
			 * If we encounter a zero pad at the end, just skip it.
			 * Otherwise skip over the function and its return type
			 * (+2) and the argument list (vlen).
			 */
			if (kind == CTF_K_UNKNOWN && vlen == 0)
				funcoff += idwidth;
			else
				funcoff += idwidth * (vlen + 2);
			break;

		default:
			*ctfoff = 0xffffffff;
			break;
		}
	}

	return (0);
}

static void
fbt_get_ctt_index(uint8_t version, const void *v, uint_t *indexp,
    uint_t *typep, int *ischildp)
{
	uint_t index, type;
	int ischild;

	if (version == CTF_VERSION_2) {
		const struct ctf_type_v2 *ctt = v;

		type = ctt->ctt_type;
		index = CTF_V2_TYPE_TO_INDEX(ctt->ctt_type);
		ischild = CTF_V2_TYPE_ISCHILD(ctt->ctt_type);
	} else {
		const struct ctf_type_v3 *ctt = v;

		type = ctt->ctt_type;
		index = CTF_V3_TYPE_TO_INDEX(ctt->ctt_type);
		ischild = CTF_V3_TYPE_ISCHILD(ctt->ctt_type);
	}

	if (indexp != NULL)
		*indexp = index;
	if (typep != NULL)
		*typep = type;
	if (ischildp != NULL)
		*ischildp = ischild;
}

static ssize_t
fbt_get_ctt_size(uint8_t version, const void *tp, ssize_t *sizep,
    ssize_t *incrementp)
{
	ssize_t size, increment;

	if (version == CTF_VERSION_2) {
		const struct ctf_type_v2 *ctt = tp;

		if (ctt->ctt_size == CTF_V2_LSIZE_SENT) {
			size = CTF_TYPE_LSIZE(ctt);
			increment = sizeof (struct ctf_type_v2);
		} else {
			size = ctt->ctt_size;
			increment = sizeof (struct ctf_stype_v2);
		}
	} else {
		const struct ctf_type_v3 *ctt = tp;

		if (ctt->ctt_size == CTF_V3_LSIZE_SENT) {
			size = CTF_TYPE_LSIZE(ctt);
			increment = sizeof (struct ctf_type_v3);
		} else {
			size = ctt->ctt_size;
			increment = sizeof (struct ctf_stype_v3);
		}
	}

	if (sizep)
		*sizep = size;
	if (incrementp)
		*incrementp = increment;

	return (size);
}

static void
fbt_get_ctt_info(uint8_t version, const void *tp, uint_t *kindp, uint_t *vlenp,
    int *isrootp)
{
	uint_t kind, vlen;
	int isroot;

	if (version == CTF_VERSION_2) {
		const struct ctf_type_v2 *ctt = tp;

		kind = CTF_V2_INFO_KIND(ctt->ctt_info);
		vlen = CTF_V2_INFO_VLEN(ctt->ctt_info);
		isroot = CTF_V2_INFO_ISROOT(ctt->ctt_info);
	} else {
		const struct ctf_type_v3 *ctt = tp;

		kind = CTF_V3_INFO_KIND(ctt->ctt_info);
		vlen = CTF_V3_INFO_VLEN(ctt->ctt_info);
		isroot = CTF_V3_INFO_ISROOT(ctt->ctt_info);
	}

	if (kindp != NULL)
		*kindp = kind;
	if (vlenp != NULL)
		*vlenp = vlen;
	if (isrootp != NULL)
		*isrootp = isroot;
}

static int
fbt_typoff_init(linker_ctf_t *lc)
{
	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
	const void *tbuf, *tend, *tp;
	const uint8_t *ctfdata = lc->ctftab + sizeof(ctf_header_t);
	size_t idwidth;
	int ctf_typemax = 0;
	uint32_t *xp;
	ulong_t pop[CTF_K_MAX + 1] = { 0 };
	uint8_t version;

	/* Sanity check. */
	if (hp->cth_magic != CTF_MAGIC)
		return (EINVAL);

	version = hp->cth_version;
	idwidth = version == CTF_VERSION_2 ? 2 : 4;

	tbuf = (const void *) (ctfdata + hp->cth_typeoff);
	tend = (const void *) (ctfdata + hp->cth_stroff);

	/*
	 * We make two passes through the entire type section.  In this first
	 * pass, we count the number of each type and the total number of types.
	 */
	for (tp = tbuf; tp < tend; ctf_typemax++) {
		uint_t kind, type, vlen;
		ssize_t size, increment;
		size_t vbytes;

		(void) fbt_get_ctt_size(version, tp, &size, &increment);
		fbt_get_ctt_info(version, tp, &kind, &vlen, NULL);
		fbt_get_ctt_index(version, tp, NULL, &type, NULL);

		switch (kind) {
		case CTF_K_INTEGER:
		case CTF_K_FLOAT:
			vbytes = sizeof (uint_t);
			break;
		case CTF_K_ARRAY:
			if (version == CTF_VERSION_2)
				vbytes = sizeof (struct ctf_array_v2);
			else
				vbytes = sizeof (struct ctf_array_v3);
			break;
		case CTF_K_FUNCTION:
			vbytes = roundup2(idwidth * vlen, sizeof(uint32_t));
			break;
		case CTF_K_STRUCT:
		case CTF_K_UNION:
			if (version == CTF_VERSION_2) {
				if (size < CTF_V2_LSTRUCT_THRESH)
					vbytes =
					    sizeof (struct ctf_member_v2) * vlen;
				else
					vbytes =
					    sizeof (struct ctf_lmember_v2) * vlen;
			} else {
				if (size < CTF_V3_LSTRUCT_THRESH)
					vbytes =
					    sizeof (struct ctf_member_v3) * vlen;
				else
					vbytes =
					    sizeof (struct ctf_lmember_v3) * vlen;
			}
			break;
		case CTF_K_ENUM:
			vbytes = sizeof (ctf_enum_t) * vlen;
			break;
		case CTF_K_FORWARD:
			/*
			 * For forward declarations, ctt_type is the CTF_K_*
			 * kind for the tag, so bump that population count too.
			 * If ctt_type is unknown, treat the tag as a struct.
			 */
			if (type == CTF_K_UNKNOWN || type >= CTF_K_MAX)
				pop[CTF_K_STRUCT]++;
			else
				pop[type]++;
			/*FALLTHRU*/
		case CTF_K_UNKNOWN:
			vbytes = 0;
			break;
		case CTF_K_POINTER:
		case CTF_K_TYPEDEF:
		case CTF_K_VOLATILE:
		case CTF_K_CONST:
		case CTF_K_RESTRICT:
			vbytes = 0;
			break;
		default:
			printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind);
			return (EIO);
		}
		tp = (const void *)((uintptr_t)tp + increment + vbytes);
		pop[kind]++;
	}

	/* account for a sentinel value below */
	ctf_typemax++;
	*lc->typlenp = ctf_typemax;

	xp = malloc(sizeof(uint32_t) * ctf_typemax, M_LINKER,
	    M_ZERO | M_WAITOK);

	*lc->typoffp = xp;

	/* type id 0 is used as a sentinel value */
	*xp++ = 0;

	/*
	 * In the second pass, fill in the type offset.
	 */
	for (tp = tbuf; tp < tend; xp++) {
		ssize_t size, increment;
		uint_t kind, vlen;

		size_t vbytes;

		(void) fbt_get_ctt_size(version, tp, &size, &increment);
		fbt_get_ctt_info(version, tp, &kind, &vlen, NULL);

		switch (kind) {
		case CTF_K_INTEGER:
		case CTF_K_FLOAT:
			vbytes = sizeof (uint_t);
			break;
		case CTF_K_ARRAY:
			if (version == CTF_VERSION_2)
				vbytes = sizeof (struct ctf_array_v2);
			else
				vbytes = sizeof (struct ctf_array_v3);
			break;
		case CTF_K_FUNCTION:
			vbytes = roundup2(idwidth * vlen, sizeof(uint32_t));
			break;
		case CTF_K_STRUCT:
		case CTF_K_UNION:
			if (version == CTF_VERSION_2) {
				if (size < CTF_V2_LSTRUCT_THRESH)
					vbytes =
					    sizeof (struct ctf_member_v2) * vlen;
				else
					vbytes =
					    sizeof (struct ctf_lmember_v2) * vlen;
			} else {
				if (size < CTF_V3_LSTRUCT_THRESH)
					vbytes =
					    sizeof (struct ctf_member_v3) * vlen;
				else
					vbytes =
					    sizeof (struct ctf_lmember_v3) * vlen;
			}
			break;
		case CTF_K_ENUM:
			vbytes = sizeof (ctf_enum_t) * vlen;
			break;
		case CTF_K_FORWARD:
		case CTF_K_UNKNOWN:
			vbytes = 0;
			break;
		case CTF_K_POINTER:
		case CTF_K_TYPEDEF:
		case CTF_K_VOLATILE:
		case CTF_K_CONST:
		case CTF_K_RESTRICT:
			vbytes = 0;
			break;
		default:
			printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind);
			return (EIO);
		}
		*xp = (uint32_t)((uintptr_t) tp - (uintptr_t) ctfdata);
		tp = (const void *)((uintptr_t)tp + increment + vbytes);
	}

	return (0);
}

/*
 * CTF Declaration Stack
 *
 * In order to implement ctf_type_name(), we must convert a type graph back
 * into a C type declaration.  Unfortunately, a type graph represents a storage
 * class ordering of the type whereas a type declaration must obey the C rules
 * for operator precedence, and the two orderings are frequently in conflict.
 * For example, consider these CTF type graphs and their C declarations:
 *
 * CTF_K_POINTER -> CTF_K_FUNCTION -> CTF_K_INTEGER  : int (*)()
 * CTF_K_POINTER -> CTF_K_ARRAY -> CTF_K_INTEGER     : int (*)[]
 *
 * In each case, parentheses are used to raise operator * to higher lexical
 * precedence, so the string form of the C declaration cannot be constructed by
 * walking the type graph links and forming the string from left to right.
 *
 * The functions in this file build a set of stacks from the type graph nodes
 * corresponding to the C operator precedence levels in the appropriate order.
 * The code in ctf_type_name() can then iterate over the levels and nodes in
 * lexical precedence order and construct the final C declaration string.
 */
typedef struct ctf_list {
	struct ctf_list *l_prev; /* previous pointer or tail pointer */
	struct ctf_list *l_next; /* next pointer or head pointer */
} ctf_list_t;

#define	ctf_list_prev(elem)	((void *)(((ctf_list_t *)(elem))->l_prev))
#define	ctf_list_next(elem)	((void *)(((ctf_list_t *)(elem))->l_next))

typedef enum {
	CTF_PREC_BASE,
	CTF_PREC_POINTER,
	CTF_PREC_ARRAY,
	CTF_PREC_FUNCTION,
	CTF_PREC_MAX
} ctf_decl_prec_t;

typedef struct ctf_decl_node {
	ctf_list_t cd_list;			/* linked list pointers */
	ctf_id_t cd_type;			/* type identifier */
	uint_t cd_kind;				/* type kind */
	uint_t cd_n;				/* type dimension if array */
} ctf_decl_node_t;

typedef struct ctf_decl {
	ctf_list_t cd_nodes[CTF_PREC_MAX];	/* declaration node stacks */
	int cd_order[CTF_PREC_MAX];		/* storage order of decls */
	ctf_decl_prec_t cd_qualp;		/* qualifier precision */
	ctf_decl_prec_t cd_ordp;		/* ordered precision */
	char *cd_buf;				/* buffer for output */
	char *cd_ptr;				/* buffer location */
	char *cd_end;				/* buffer limit */
	size_t cd_len;				/* buffer space required */
	int cd_err;				/* saved error value */
} ctf_decl_t;

/*
 * Simple doubly-linked list append routine.  This implementation assumes that
 * each list element contains an embedded ctf_list_t as the first member.
 * An additional ctf_list_t is used to store the head (l_next) and tail
 * (l_prev) pointers.  The current head and tail list elements have their
 * previous and next pointers set to NULL, respectively.
 */
static void
ctf_list_append(ctf_list_t *lp, void *new)
{
	ctf_list_t *p = lp->l_prev;	/* p = tail list element */
	ctf_list_t *q = new;		/* q = new list element */

	lp->l_prev = q;
	q->l_prev = p;
	q->l_next = NULL;

	if (p != NULL)
		p->l_next = q;
	else
		lp->l_next = q;
}

/*
 * Prepend the specified existing element to the given ctf_list_t.  The
 * existing pointer should be pointing at a struct with embedded ctf_list_t.
 */
static void
ctf_list_prepend(ctf_list_t *lp, void *new)
{
	ctf_list_t *p = new;		/* p = new list element */
	ctf_list_t *q = lp->l_next;	/* q = head list element */

	lp->l_next = p;
	p->l_prev = NULL;
	p->l_next = q;

	if (q != NULL)
		q->l_prev = p;
	else
		lp->l_prev = p;
}

static void
ctf_decl_init(ctf_decl_t *cd, char *buf, size_t len)
{
	int i;

	bzero(cd, sizeof (ctf_decl_t));

	for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++)
		cd->cd_order[i] = CTF_PREC_BASE - 1;

	cd->cd_qualp = CTF_PREC_BASE;
	cd->cd_ordp = CTF_PREC_BASE;

	cd->cd_buf = buf;
	cd->cd_ptr = buf;
	cd->cd_end = buf + len;
}

static void
ctf_decl_fini(ctf_decl_t *cd)
{
	ctf_decl_node_t *cdp, *ndp;
	int i;

	for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++) {
		for (cdp = ctf_list_next(&cd->cd_nodes[i]);
		    cdp != NULL; cdp = ndp) {
			ndp = ctf_list_next(cdp);
			free(cdp, M_FBT);
		}
	}
}

static const void *
ctf_lookup_by_id(linker_ctf_t *lc, ctf_id_t type)
{
	const void *tp;
	uint32_t offset;
	uint32_t *typoff = *lc->typoffp;

	if (type >= *lc->typlenp) {
		printf("%s(%d): type %d exceeds max %ld\n",__func__,__LINE__,(int) type,*lc->typlenp);
		return(NULL);
	}

	/* Check if the type isn't cross-referenced. */
	if ((offset = typoff[type]) == 0) {
		printf("%s(%d): type %d isn't cross referenced\n",__func__,__LINE__, (int) type);
		return(NULL);
	}

	tp = (const void *) (lc->ctftab + offset + sizeof(ctf_header_t));

	return (tp);
}

static void
fbt_array_info(linker_ctf_t *lc, ctf_id_t type, ctf_arinfo_t *arp)
{
	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
	const void *tp;
	ssize_t increment;
	uint_t kind;

	bzero(arp, sizeof(*arp));

	if ((tp = ctf_lookup_by_id(lc, type)) == NULL)
		return;

	fbt_get_ctt_info(hp->cth_version, tp, &kind, NULL, NULL);
	if (kind != CTF_K_ARRAY)
		return;

	(void) fbt_get_ctt_size(hp->cth_version, tp, NULL, &increment);

	if (hp->cth_version == CTF_VERSION_2) {
		const struct ctf_array_v2 *ap;

		ap = (const struct ctf_array_v2 *)((uintptr_t)tp + increment);
		arp->ctr_contents = ap->cta_contents;
		arp->ctr_index = ap->cta_index;
		arp->ctr_nelems = ap->cta_nelems;
	} else {
		const struct ctf_array_v3 *ap;

		ap = (const struct ctf_array_v3 *)((uintptr_t)tp + increment);
		arp->ctr_contents = ap->cta_contents;
		arp->ctr_index = ap->cta_index;
		arp->ctr_nelems = ap->cta_nelems;
	}
}

static const char *
ctf_strptr(linker_ctf_t *lc, int name)
{
	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
	const char *strp = "";

	if (name < 0 || name >= hp->cth_strlen)
		return(strp);

	strp = (const char *)(lc->ctftab + hp->cth_stroff + name + sizeof(ctf_header_t));

	return (strp);
}

static const char *
ctf_type_rname(linker_ctf_t *lc, const void *v)
{
	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
	uint_t name;

	if (hp->cth_version == CTF_VERSION_2) {
		const struct ctf_type_v2 *ctt = v;

		name = ctt->ctt_name;
	} else {
		const struct ctf_type_v3 *ctt = v;

		name = ctt->ctt_name;
	}

	return (ctf_strptr(lc, name));
}

static void
ctf_decl_push(ctf_decl_t *cd, linker_ctf_t *lc, ctf_id_t type)
{
	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
	ctf_decl_node_t *cdp;
	ctf_decl_prec_t prec;
	uint_t kind, n = 1, t;
	int is_qual = 0;

	const void *tp;
	ctf_arinfo_t ar;

	if ((tp = ctf_lookup_by_id(lc, type)) == NULL) {
		cd->cd_err = ENOENT;
		return;
	}

	fbt_get_ctt_info(hp->cth_version, tp, &kind, NULL, NULL);
	fbt_get_ctt_index(hp->cth_version, tp, NULL, &t, NULL);

	switch (kind) {
	case CTF_K_ARRAY:
		fbt_array_info(lc, type, &ar);
		ctf_decl_push(cd, lc, ar.ctr_contents);
		n = ar.ctr_nelems;
		prec = CTF_PREC_ARRAY;
		break;

	case CTF_K_TYPEDEF:
		if (ctf_type_rname(lc, tp)[0] == '\0') {
			ctf_decl_push(cd, lc, t);
			return;
		}
		prec = CTF_PREC_BASE;
		break;

	case CTF_K_FUNCTION:
		ctf_decl_push(cd, lc, t);
		prec = CTF_PREC_FUNCTION;
		break;

	case CTF_K_POINTER:
		ctf_decl_push(cd, lc, t);
		prec = CTF_PREC_POINTER;
		break;

	case CTF_K_VOLATILE:
	case CTF_K_CONST:
	case CTF_K_RESTRICT:
		ctf_decl_push(cd, lc, t);
		prec = cd->cd_qualp;
		is_qual++;
		break;

	default:
		prec = CTF_PREC_BASE;
	}

	cdp = malloc(sizeof(*cdp), M_FBT, M_WAITOK);
	cdp->cd_type = type;
	cdp->cd_kind = kind;
	cdp->cd_n = n;

	if (ctf_list_next(&cd->cd_nodes[prec]) == NULL)
		cd->cd_order[prec] = cd->cd_ordp++;

	/*
	 * Reset cd_qualp to the highest precedence level that we've seen so
	 * far that can be qualified (CTF_PREC_BASE or CTF_PREC_POINTER).
	 */
	if (prec > cd->cd_qualp && prec < CTF_PREC_ARRAY)
		cd->cd_qualp = prec;

	/*
	 * C array declarators are ordered inside out so prepend them.  Also by
	 * convention qualifiers of base types precede the type specifier (e.g.
	 * const int vs. int const) even though the two forms are equivalent.
	 */
	if (kind == CTF_K_ARRAY || (is_qual && prec == CTF_PREC_BASE))
		ctf_list_prepend(&cd->cd_nodes[prec], cdp);
	else
		ctf_list_append(&cd->cd_nodes[prec], cdp);
}

static void
ctf_decl_sprintf(ctf_decl_t *cd, const char *format, ...)
{
	size_t len = (size_t)(cd->cd_end - cd->cd_ptr);
	va_list ap;
	size_t n;

	va_start(ap, format);
	n = vsnprintf(cd->cd_ptr, len, format, ap);
	va_end(ap);

	cd->cd_ptr += MIN(n, len);
	cd->cd_len += n;
}

static ssize_t
fbt_type_name(linker_ctf_t *lc, ctf_id_t type, char *buf, size_t len)
{
	ctf_decl_t cd;
	ctf_decl_node_t *cdp;
	ctf_decl_prec_t prec, lp, rp;
	int ptr, arr;
	uint_t k;

	if (lc == NULL && type == CTF_ERR)
		return (-1); /* simplify caller code by permitting CTF_ERR */

	ctf_decl_init(&cd, buf, len);
	ctf_decl_push(&cd, lc, type);

	if (cd.cd_err != 0) {
		ctf_decl_fini(&cd);
		return (-1);
	}

	/*
	 * If the type graph's order conflicts with lexical precedence order
	 * for pointers or arrays, then we need to surround the declarations at
	 * the corresponding lexical precedence with parentheses.  This can
	 * result in either a parenthesized pointer (*) as in int (*)() or
	 * int (*)[], or in a parenthesized pointer and array as in int (*[])().
	 */
	ptr = cd.cd_order[CTF_PREC_POINTER] > CTF_PREC_POINTER;
	arr = cd.cd_order[CTF_PREC_ARRAY] > CTF_PREC_ARRAY;

	rp = arr ? CTF_PREC_ARRAY : ptr ? CTF_PREC_POINTER : -1;
	lp = ptr ? CTF_PREC_POINTER : arr ? CTF_PREC_ARRAY : -1;

	k = CTF_K_POINTER; /* avoid leading whitespace (see below) */

	for (prec = CTF_PREC_BASE; prec < CTF_PREC_MAX; prec++) {
		for (cdp = ctf_list_next(&cd.cd_nodes[prec]);
		    cdp != NULL; cdp = ctf_list_next(cdp)) {

			const void *tp = ctf_lookup_by_id(lc, cdp->cd_type);
			const char *name = ctf_type_rname(lc, tp);

			if (k != CTF_K_POINTER && k != CTF_K_ARRAY)
				ctf_decl_sprintf(&cd, " ");

			if (lp == prec) {
				ctf_decl_sprintf(&cd, "(");
				lp = -1;
			}

			switch (cdp->cd_kind) {
			case CTF_K_INTEGER:
			case CTF_K_FLOAT:
			case CTF_K_TYPEDEF:
				ctf_decl_sprintf(&cd, "%s", name);
				break;
			case CTF_K_POINTER:
				ctf_decl_sprintf(&cd, "*");
				break;
			case CTF_K_ARRAY:
				ctf_decl_sprintf(&cd, "[%u]", cdp->cd_n);
				break;
			case CTF_K_FUNCTION:
				ctf_decl_sprintf(&cd, "()");
				break;
			case CTF_K_STRUCT:
			case CTF_K_FORWARD:
				ctf_decl_sprintf(&cd, "struct %s", name);
				break;
			case CTF_K_UNION:
				ctf_decl_sprintf(&cd, "union %s", name);
				break;
			case CTF_K_ENUM:
				ctf_decl_sprintf(&cd, "enum %s", name);
				break;
			case CTF_K_VOLATILE:
				ctf_decl_sprintf(&cd, "volatile");
				break;
			case CTF_K_CONST:
				ctf_decl_sprintf(&cd, "const");
				break;
			case CTF_K_RESTRICT:
				ctf_decl_sprintf(&cd, "restrict");
				break;
			}

			k = cdp->cd_kind;
		}

		if (rp == prec)
			ctf_decl_sprintf(&cd, ")");
	}

	ctf_decl_fini(&cd);
	return (cd.cd_len);
}

static void
fbt_getargdesc(void *arg __unused, dtrace_id_t id __unused, void *parg, dtrace_argdesc_t *desc)
{
	const ctf_header_t *hp;
	const char *dp;
	fbt_probe_t *fbt = parg;
	linker_ctf_t lc;
	modctl_t *ctl = fbt->fbtp_ctl;
	size_t idwidth;
	int ndx = desc->dtargd_ndx;
	int symindx = fbt->fbtp_symindx;
	uint32_t *ctfoff;
	uint32_t offset, type;
	uint_t info, n;
	ushort_t kind;

	if (fbt->fbtp_roffset != 0 && desc->dtargd_ndx == 0) {
		(void) strcpy(desc->dtargd_native, "int");
		return;
	}

	desc->dtargd_ndx = DTRACE_ARGNONE;

	/* Get a pointer to the CTF data and it's length. */
	if (linker_ctf_get(ctl, &lc) != 0)
		/* No CTF data? Something wrong? *shrug* */
		return;

	/* Check if this module hasn't been initialised yet. */
	if (*lc.ctfoffp == NULL) {
		/*
		 * Initialise the CTF object and function symindx to
		 * byte offset array.
		 */
		if (fbt_ctfoff_init(ctl, &lc) != 0)
			return;

		/* Initialise the CTF type to byte offset array. */
		if (fbt_typoff_init(&lc) != 0)
			return;
	}

	ctfoff = *lc.ctfoffp;

	if (ctfoff == NULL || *lc.typoffp == NULL)
		return;

	/* Check if the symbol index is out of range. */
	if (symindx >= lc.nsym)
		return;

	/* Check if the symbol isn't cross-referenced. */
	if ((offset = ctfoff[symindx]) == 0xffffffff)
		return;

	hp = (const ctf_header_t *) lc.ctftab;
	idwidth = hp->cth_version == CTF_VERSION_2 ? 2 : 4;
	dp = (const char *)(lc.ctftab + offset + sizeof(ctf_header_t));

	info = 0;
	memcpy(&info, dp, idwidth);
	dp += idwidth;
	if (hp->cth_version == CTF_VERSION_2) {
		kind = CTF_V2_INFO_KIND(info);
		n = CTF_V2_INFO_VLEN(info);
	} else {
		kind = CTF_V3_INFO_KIND(info);
		n = CTF_V3_INFO_VLEN(info);
	}

	if (kind == CTF_K_UNKNOWN && n == 0) {
		printf("%s(%d): Unknown function!\n",__func__,__LINE__);
		return;
	}

	if (kind != CTF_K_FUNCTION) {
		printf("%s(%d): Expected a function!\n",__func__,__LINE__);
		return;
	}

	if (fbt->fbtp_roffset != 0) {
		/* Only return type is available for args[1] in return probe. */
		if (ndx > 1)
			return;
		ASSERT(ndx == 1);
	} else {
		/* Check if the requested argument doesn't exist. */
		if (ndx >= n)
			return;

		/* Skip the return type and arguments up to the one requested. */
		dp += idwidth * (ndx + 1);
	}

	type = 0;
	memcpy(&type, dp, idwidth);
	if (fbt_type_name(&lc, type, desc->dtargd_native, sizeof(desc->dtargd_native)) > 0)
		desc->dtargd_ndx = ndx;
}

static int
fbt_linker_file_cb(linker_file_t lf, void *arg)
{

	fbt_provide_module(arg, lf);

	return (0);
}

static void
fbt_load(void *dummy)
{
	/* Default the probe table size if not specified. */
	if (fbt_probetab_size == 0)
		fbt_probetab_size = FBT_PROBETAB_SIZE;

	/* Choose the hash mask for the probe table. */
	fbt_probetab_mask = fbt_probetab_size - 1;

	/* Allocate memory for the probe table. */
	fbt_probetab =
	    malloc(fbt_probetab_size * sizeof (fbt_probe_t *), M_FBT, M_WAITOK | M_ZERO);

	dtrace_doubletrap_func = fbt_doubletrap;
	dtrace_invop_add(fbt_invop);

	if (dtrace_register("fbt", &fbt_attr, DTRACE_PRIV_USER,
	    NULL, &fbt_pops, NULL, &fbt_id) != 0)
		return;

	/* Create probes for the kernel and already-loaded modules. */
	linker_file_foreach(fbt_linker_file_cb, NULL);
}

static int
fbt_unload(void)
{
	int error = 0;

	/* De-register the invalid opcode handler. */
	dtrace_invop_remove(fbt_invop);

	dtrace_doubletrap_func = NULL;

	/* De-register this DTrace provider. */
	if ((error = dtrace_unregister(fbt_id)) != 0)
		return (error);

	/* Free the probe table. */
	free(fbt_probetab, M_FBT);
	fbt_probetab = NULL;
	fbt_probetab_mask = 0;

	return (error);
}

static int
fbt_modevent(module_t mod __unused, int type, void *data __unused)
{
	int error = 0;

	switch (type) {
	case MOD_LOAD:
		break;

	case MOD_UNLOAD:
		break;

	case MOD_SHUTDOWN:
		break;

	default:
		error = EOPNOTSUPP;
		break;

	}

	return (error);
}

SYSINIT(fbt_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_load, NULL);
SYSUNINIT(fbt_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_unload, NULL);

DEV_MODULE(fbt, fbt_modevent, NULL);
MODULE_VERSION(fbt, 1);
MODULE_DEPEND(fbt, dtrace, 1, 1, 1);
MODULE_DEPEND(fbt, opensolaris, 1, 1, 1);