/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#pragma ident	"%Z%%M%	%I%	%E% SMI"

/*
 * Routines for preparing tdata trees for conversion into CTF data, and
 * for placing the resulting data into an output file.
 */

#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <libelf.h>
#include <gelf.h>
#include <unistd.h>

#include "ctftools.h"
#include "list.h"
#include "memory.h"
#include "traverse.h"
#include "symbol.h"

typedef struct iidesc_match {
	int iim_fuzzy;
	iidesc_t *iim_ret;
	char *iim_name;
	char *iim_file;
	uchar_t iim_bind;
} iidesc_match_t;

static int
burst_iitypes(void *data, void *arg)
{
	iidesc_t *ii = data;
	iiburst_t *iiburst = arg;

	switch (ii->ii_type) {
	case II_GFUN:
	case II_SFUN:
	case II_GVAR:
	case II_SVAR:
		if (!(ii->ii_flags & IIDESC_F_USED))
			return (0);
		break;
	default:
		break;
	}

	ii->ii_dtype->t_flags |= TDESC_F_ISROOT;
	(void) iitraverse_td(ii, iiburst->iib_tdtd);
	return (1);
}

/*ARGSUSED1*/
static int
save_type_by_id(tdesc_t *tdp, tdesc_t **tdpp, void *private)
{
	iiburst_t *iiburst = private;

	/*
	 * Doing this on every node is horribly inefficient, but given that
	 * we may be suppressing some types, we can't trust nextid in the
	 * tdata_t.
	 */
	if (tdp->t_id > iiburst->iib_maxtypeid)
		iiburst->iib_maxtypeid = tdp->t_id;

	slist_add(&iiburst->iib_types, tdp, tdesc_idcmp);

	return (1);
}

static tdtrav_cb_f burst_types_cbs[] = {
	NULL,
	save_type_by_id,	/* intrinsic */
	save_type_by_id,	/* pointer */
	save_type_by_id,	/* array */
	save_type_by_id,	/* function */
	save_type_by_id,	/* struct */
	save_type_by_id,	/* union */
	save_type_by_id,	/* enum */
	save_type_by_id,	/* forward */
	save_type_by_id,	/* typedef */
	tdtrav_assert,		/* typedef_unres */
	save_type_by_id,	/* volatile */
	save_type_by_id,	/* const */
	save_type_by_id		/* restrict */
};


static iiburst_t *
iiburst_new(tdata_t *td, int max)
{
	iiburst_t *iiburst = xcalloc(sizeof (iiburst_t));
	iiburst->iib_td = td;
	iiburst->iib_funcs = xcalloc(sizeof (iidesc_t *) * max);
	iiburst->iib_nfuncs = 0;
	iiburst->iib_objts = xcalloc(sizeof (iidesc_t *) * max);
	iiburst->iib_nobjts = 0;
	return (iiburst);
}

static void
iiburst_types(iiburst_t *iiburst)
{
	tdtrav_data_t tdtd;

	tdtrav_init(&tdtd, &iiburst->iib_td->td_curvgen, NULL, burst_types_cbs,
	    NULL, (void *)iiburst);

	iiburst->iib_tdtd = &tdtd;

	(void) hash_iter(iiburst->iib_td->td_iihash, burst_iitypes, iiburst);
}

static void
iiburst_free(iiburst_t *iiburst)
{
	free(iiburst->iib_funcs);
	free(iiburst->iib_objts);
	list_free(iiburst->iib_types, NULL, NULL);
	free(iiburst);
}

/*
 * See if this iidesc matches the ELF symbol data we pass in.
 *
 * A fuzzy match is where we have a local symbol matching the name of a
 * global type description. This is common when a mapfile is used for a
 * DSO, but we don't accept it by default.
 *
 * A weak fuzzy match is when a weak symbol was resolved and matched to
 * a global type description.
 */
static int
matching_iidesc(iidesc_t *iidesc, iidesc_match_t *match)
{
	if (streq(iidesc->ii_name, match->iim_name) == 0)
		return (0);

	switch (iidesc->ii_type) {
	case II_GFUN:
	case II_GVAR:
		if (match->iim_bind == STB_GLOBAL) {
			match->iim_ret = iidesc;
			return (-1);
		} else if (match->iim_fuzzy && match->iim_ret == NULL) {
			match->iim_ret = iidesc;
			/* continue to look for strong match */
			return (0);
		}
		break;
	case II_SFUN:
	case II_SVAR:
		if (match->iim_bind == STB_LOCAL &&
		    match->iim_file != NULL &&
		    streq(iidesc->ii_owner, match->iim_file)) {
			match->iim_ret = iidesc;
			return (-1);
		}
		break;
	}
	return (0);
}

static iidesc_t *
find_iidesc(hash_t *hash, iidesc_match_t *match)
{
	iidesc_t tmpdesc;
	match->iim_ret = NULL;
	bzero(&tmpdesc, sizeof (iidesc_t));
	tmpdesc.ii_name = match->iim_name;
	(void) hash_match(hash, &tmpdesc, (int (*)())matching_iidesc, match);
	return (match->iim_ret);
}

/*
 * If we have a weak symbol, attempt to find the strong symbol it will
 * resolve to.  Note: the code where this actually happens is in
 * sym_process() in cmd/sgs/libld/common/syms.c
 *
 * Finding the matching symbol is unfortunately not trivial.  For a
 * symbol to be a candidate, it must:
 *
 * - have the same type (function, object)
 * - have the same value (address)
 * - have the same size
 * - not be another weak symbol
 * - belong to the same section (checked via section index)
 *
 * If such a candidate is global, then we assume we've found it.  The
 * linker generates the symbol table such that the curfile might be
 * incorrect; this is OK for global symbols, since find_iidesc() doesn't
 * need to check for the source file for the symbol.
 *
 * We might have found a strong local symbol, where the curfile is
 * accurate and matches that of the weak symbol.  We assume this is a
 * reasonable match.
 *
 * If we've got a local symbol with a non-matching curfile, there are
 * two possibilities.  Either this is a completely different symbol, or
 * it's a once-global symbol that was scoped to local via a mapfile.  In
 * the latter case, curfile is likely inaccurate since the linker does
 * not preserve the needed curfile in the order of the symbol table (see
 * the comments about locally scoped symbols in libld's update_osym()).
 * As we can't tell this case from the former one, we use this symbol
 * iff no other matching symbol is found.
 *
 * What we really need here is a SUNW section containing weak<->strong
 * mappings that we can consume.
 */
static int
check_for_weak(GElf_Sym *weak, char const *weakfile,
    Elf_Data *data, int nent, Elf_Data *strdata,
    GElf_Sym *retsym, char **curfilep)
{
	char *curfile = NULL;
	char *tmpfile;
	GElf_Sym tmpsym;
	int candidate = 0;
	int i;

	if (GELF_ST_BIND(weak->st_info) != STB_WEAK)
		return (0);

	for (i = 0; i < nent; i++) {
		GElf_Sym sym;
		uchar_t type;

		if (gelf_getsym(data, i, &sym) == NULL)
			continue;

		type = GELF_ST_TYPE(sym.st_info);

		if (type == STT_FILE)
			curfile = (char *)strdata->d_buf + sym.st_name;

		if (GELF_ST_TYPE(weak->st_info) != type ||
		    weak->st_value != sym.st_value)
			continue;

		if (weak->st_size != sym.st_size)
			continue;

		if (GELF_ST_BIND(sym.st_info) == STB_WEAK)
			continue;

		if (sym.st_shndx != weak->st_shndx)
			continue;

		if (GELF_ST_BIND(sym.st_info) == STB_LOCAL &&
		    (curfile == NULL || weakfile == NULL ||
		    strcmp(curfile, weakfile) != 0)) {
			candidate = 1;
			tmpfile = curfile;
			tmpsym = sym;
			continue;
		}

		*curfilep = curfile;
		*retsym = sym;
		return (1);
	}

	if (candidate) {
		*curfilep = tmpfile;
		*retsym = tmpsym;
		return (1);
	}

	return (0);
}

/*
 * When we've found the underlying symbol's type description
 * for a weak symbol, we need to copy it and rename it to match
 * the weak symbol. We also need to add it to the td so it's
 * handled along with the others later.
 */
static iidesc_t *
copy_from_strong(tdata_t *td, GElf_Sym *sym, iidesc_t *strongdesc,
    const char *weakname, const char *weakfile)
{
	iidesc_t *new = iidesc_dup_rename(strongdesc, weakname, weakfile);
	uchar_t type = GELF_ST_TYPE(sym->st_info);

	switch (type) {
	case STT_OBJECT:
		new->ii_type = II_GVAR;
		break;
	case STT_FUNC:
		new->ii_type = II_GFUN;
		break;
	}

	hash_add(td->td_iihash, new);

	return (new);
}

/*
 * Process the symbol table of the output file, associating each symbol
 * with a type description if possible, and sorting them into functions
 * and data, maintaining symbol table order.
 */
static iiburst_t *
sort_iidescs(Elf *elf, const char *file, tdata_t *td, int fuzzymatch,
    int dynsym)
{
	iiburst_t *iiburst;
	Elf_Scn *scn;
	GElf_Shdr shdr;
	Elf_Data *data, *strdata;
	int i, stidx;
	int nent;
	iidesc_match_t match;

	match.iim_fuzzy = fuzzymatch;
	match.iim_file = NULL;

	if ((stidx = findelfsecidx(elf, dynsym ? ".dynsym" : ".symtab")) < 0)
		terminate("%s: Can't open symbol table\n", file);
	scn = elf_getscn(elf, stidx);
	data = elf_getdata(scn, NULL);
	gelf_getshdr(scn, &shdr);
	nent = shdr.sh_size / shdr.sh_entsize;

	scn = elf_getscn(elf, shdr.sh_link);
	strdata = elf_getdata(scn, NULL);

	iiburst = iiburst_new(td, nent);

	for (i = 0; i < nent; i++) {
		GElf_Sym sym;
		iidesc_t **tolist;
		GElf_Sym ssym;
		iidesc_match_t smatch;
		int *curr;
		iidesc_t *iidesc;

		if (gelf_getsym(data, i, &sym) == NULL)
			elfterminate(file, "Couldn't read symbol %d", i);

		match.iim_name = (char *)strdata->d_buf + sym.st_name;
		match.iim_bind = GELF_ST_BIND(sym.st_info);

		switch (GELF_ST_TYPE(sym.st_info)) {
		case STT_FILE:
			match.iim_file = match.iim_name;
			continue;
		case STT_OBJECT:
			tolist = iiburst->iib_objts;
			curr = &iiburst->iib_nobjts;
			break;
		case STT_FUNC:
			tolist = iiburst->iib_funcs;
			curr = &iiburst->iib_nfuncs;
			break;
		default:
			continue;
		}

		if (ignore_symbol(&sym, match.iim_name))
			continue;

		iidesc = find_iidesc(td->td_iihash, &match);

		if (iidesc != NULL) {
			tolist[*curr] = iidesc;
			iidesc->ii_flags |= IIDESC_F_USED;
			(*curr)++;
			continue;
		}

		if (!check_for_weak(&sym, match.iim_file, data, nent, strdata,
		    &ssym, &smatch.iim_file)) {
			(*curr)++;
			continue;
		}

		smatch.iim_fuzzy = fuzzymatch;
		smatch.iim_name = (char *)strdata->d_buf + ssym.st_name;
		smatch.iim_bind = GELF_ST_BIND(ssym.st_info);

		debug(3, "Weak symbol %s resolved to %s\n", match.iim_name,
		    smatch.iim_name);

		iidesc = find_iidesc(td->td_iihash, &smatch);

		if (iidesc != NULL) {
			tolist[*curr] = copy_from_strong(td, &sym,
			    iidesc, match.iim_name, match.iim_file);
			tolist[*curr]->ii_flags |= IIDESC_F_USED;
		}

		(*curr)++;
	}

	/*
	 * Stabs are generated for every function declared in a given C source
	 * file.  When converting an object file, we may encounter a stab that
	 * has no symbol table entry because the optimizer has decided to omit
	 * that item (for example, an unreferenced static function).  We may
	 * see iidescs that do not have an associated symtab entry, and so
	 * we do not write records for those functions into the CTF data.
	 * All others get marked as a root by this function.
	 */
	iiburst_types(iiburst);

	/*
	 * By not adding some of the functions and/or objects, we may have
	 * caused some types that were referenced solely by those
	 * functions/objects to be suppressed.  This could cause a label,
	 * generated prior to the evisceration, to be incorrect.  Find the
	 * highest type index, and change the label indicies to be no higher
	 * than this value.
	 */
	tdata_label_newmax(td, iiburst->iib_maxtypeid);

	return (iiburst);
}

static void
write_file(Elf *src, const char *srcname, Elf *dst, const char *dstname,
    caddr_t ctfdata, size_t ctfsize, int flags)
{
	GElf_Ehdr sehdr, dehdr;
	Elf_Scn *sscn, *dscn;
	Elf_Data *sdata, *ddata;
	GElf_Shdr shdr;
	GElf_Word symtab_type;
	int symtab_idx = -1;
	off_t new_offset = 0;
	off_t ctfnameoff = 0;
	int dynsym = (flags & CTF_USE_DYNSYM);
	int keep_stabs = (flags & CTF_KEEP_STABS);
	int *secxlate;
	int srcidx, dstidx;
	int curnmoff = 0;
	int changing = 0;
	int pad;
	int i;

	if (gelf_newehdr(dst, gelf_getclass(src)) == NULL)
		elfterminate(dstname, "Cannot copy ehdr to temp file");
	gelf_getehdr(src, &sehdr);
	memcpy(&dehdr, &sehdr, sizeof (GElf_Ehdr));
	gelf_update_ehdr(dst, &dehdr);

	symtab_type = dynsym ? SHT_DYNSYM : SHT_SYMTAB;

	/*
	 * Neither the existing stab sections nor the SUNW_ctf sections (new or
	 * existing) are SHF_ALLOC'd, so they won't be in areas referenced by
	 * program headers.  As such, we can just blindly copy the program
	 * headers from the existing file to the new file.
	 */
	if (sehdr.e_phnum != 0) {
		(void) elf_flagelf(dst, ELF_C_SET, ELF_F_LAYOUT);
		if (gelf_newphdr(dst, sehdr.e_phnum) == NULL)
			elfterminate(dstname, "Cannot make phdrs in temp file");

		for (i = 0; i < sehdr.e_phnum; i++) {
			GElf_Phdr phdr;

			gelf_getphdr(src, i, &phdr);
			gelf_update_phdr(dst, i, &phdr);
		}
	}

	secxlate = xmalloc(sizeof (int) * sehdr.e_shnum);
	for (srcidx = dstidx = 0; srcidx < sehdr.e_shnum; srcidx++) {
		Elf_Scn *scn = elf_getscn(src, srcidx);
		GElf_Shdr shdr;
		char *sname;

		gelf_getshdr(scn, &shdr);
		sname = elf_strptr(src, sehdr.e_shstrndx, shdr.sh_name);
		if (sname == NULL) {
			elfterminate(srcname, "Can't find string at %u",
			    shdr.sh_name);
		}

		if (strcmp(sname, CTF_ELF_SCN_NAME) == 0) {
			secxlate[srcidx] = -1;
		} else if (!keep_stabs &&
		    (strncmp(sname, ".stab", 5) == 0 ||
		    strncmp(sname, ".debug", 6) == 0 ||
		    strncmp(sname, ".rel.debug", 10) == 0 ||
		    strncmp(sname, ".rela.debug", 11) == 0)) {
			secxlate[srcidx] = -1;
		} else if (dynsym && shdr.sh_type == SHT_SYMTAB) {
			/*
			 * If we're building CTF against the dynsym,
			 * we'll rip out the symtab so debuggers aren't
			 * confused.
			 */
			secxlate[srcidx] = -1;
		} else {
			secxlate[srcidx] = dstidx++;
			curnmoff += strlen(sname) + 1;
		}

		new_offset = (off_t)dehdr.e_phoff;
	}

	for (srcidx = 1; srcidx < sehdr.e_shnum; srcidx++) {
		char *sname;

		sscn = elf_getscn(src, srcidx);
		gelf_getshdr(sscn, &shdr);

		if (secxlate[srcidx] == -1) {
			changing = 1;
			continue;
		}

		dscn = elf_newscn(dst);

		/*
		 * If this file has program headers, we need to explicitly lay
		 * out sections.  If none of the sections prior to this one have
		 * been removed, then we can just use the existing location.  If
		 * one or more sections have been changed, then we need to
		 * adjust this one to avoid holes.
		 */
		if (changing && sehdr.e_phnum != 0) {
			pad = new_offset % shdr.sh_addralign;

			if (pad)
				new_offset += shdr.sh_addralign - pad;
			shdr.sh_offset = new_offset;
		}

		shdr.sh_link = secxlate[shdr.sh_link];

		if (shdr.sh_type == SHT_REL || shdr.sh_type == SHT_RELA)
			shdr.sh_info = secxlate[shdr.sh_info];

		sname = elf_strptr(src, sehdr.e_shstrndx, shdr.sh_name);
		if (sname == NULL) {
			elfterminate(srcname, "Can't find string at %u",
			    shdr.sh_name);
		}
		if ((sdata = elf_getdata(sscn, NULL)) == NULL)
			elfterminate(srcname, "Cannot get sect %s data", sname);
		if ((ddata = elf_newdata(dscn)) == NULL)
			elfterminate(dstname, "Can't make sect %s data", sname);
		bcopy(sdata, ddata, sizeof (Elf_Data));

		if (srcidx == sehdr.e_shstrndx) {
			char seclen = strlen(CTF_ELF_SCN_NAME);

			ddata->d_buf = xmalloc(ddata->d_size + shdr.sh_size +
			    seclen + 1);
			bcopy(sdata->d_buf, ddata->d_buf, shdr.sh_size);
			strcpy((caddr_t)ddata->d_buf + shdr.sh_size,
			    CTF_ELF_SCN_NAME);
			ctfnameoff = (off_t)shdr.sh_size;
			shdr.sh_size += seclen + 1;
			ddata->d_size += seclen + 1;

			if (sehdr.e_phnum != 0)
				changing = 1;
		}

		if (shdr.sh_type == symtab_type && shdr.sh_entsize != 0) {
			int nsym = shdr.sh_size / shdr.sh_entsize;

			symtab_idx = secxlate[srcidx];

			ddata->d_buf = xmalloc(shdr.sh_size);
			bcopy(sdata->d_buf, ddata->d_buf, shdr.sh_size);

			for (i = 0; i < nsym; i++) {
				GElf_Sym sym;
				short newscn;

				(void) gelf_getsym(ddata, i, &sym);

				if (sym.st_shndx >= SHN_LORESERVE)
					continue;

				if ((newscn = secxlate[sym.st_shndx]) !=
				    sym.st_shndx) {
					sym.st_shndx =
					    (newscn == -1 ? 1 : newscn);

					gelf_update_sym(ddata, i, &sym);
				}
			}
		}

		if (gelf_update_shdr(dscn, &shdr) == NULL)
			elfterminate(dstname, "Cannot update sect %s", sname);

		new_offset = (off_t)shdr.sh_offset;
		if (shdr.sh_type != SHT_NOBITS)
			new_offset += shdr.sh_size;
	}

	if (symtab_idx == -1) {
		terminate("Cannot find %s section\n",
		    dynsym ? "SHT_DYNSYM" : "SHT_SYMTAB");
	}

	/* Add the ctf section */
	dscn = elf_newscn(dst);
	gelf_getshdr(dscn, &shdr);
	shdr.sh_name = ctfnameoff;
	shdr.sh_type = SHT_PROGBITS;
	shdr.sh_size = ctfsize;
	shdr.sh_link = symtab_idx;
	shdr.sh_addralign = 4;
	if (changing && sehdr.e_phnum != 0) {
		pad = new_offset % shdr.sh_addralign;

		if (pad)
			new_offset += shdr.sh_addralign - pad;

		shdr.sh_offset = new_offset;
		new_offset += shdr.sh_size;
	}

	ddata = elf_newdata(dscn);
	ddata->d_buf = ctfdata;
	ddata->d_size = ctfsize;
	ddata->d_align = shdr.sh_addralign;

	gelf_update_shdr(dscn, &shdr);

	/* update the section header location */
	if (sehdr.e_phnum != 0) {
		size_t align = gelf_fsize(dst, ELF_T_ADDR, 1, EV_CURRENT);
		size_t r = new_offset % align;

		if (r)
			new_offset += align - r;

		dehdr.e_shoff = new_offset;
	}

	/* commit to disk */
	dehdr.e_shstrndx = secxlate[sehdr.e_shstrndx];
	gelf_update_ehdr(dst, &dehdr);
	if (elf_update(dst, ELF_C_WRITE) < 0)
		elfterminate(dstname, "Cannot finalize temp file");

	free(secxlate);
}

static caddr_t
make_ctf_data(tdata_t *td, Elf *elf, const char *file, size_t *lenp, int flags)
{
	iiburst_t *iiburst;
	caddr_t data;

	iiburst = sort_iidescs(elf, file, td, flags & CTF_FUZZY_MATCH,
	    flags & CTF_USE_DYNSYM);
	data = ctf_gen(iiburst, lenp, flags & CTF_COMPRESS);

	iiburst_free(iiburst);

	return (data);
}

void
write_ctf(tdata_t *td, const char *curname, const char *newname, int flags)
{
	struct stat st;
	Elf *elf = NULL;
	Elf *telf = NULL;
	caddr_t data;
	size_t len;
	int fd = -1;
	int tfd = -1;

	(void) elf_version(EV_CURRENT);
	if ((fd = open(curname, O_RDONLY)) < 0 || fstat(fd, &st) < 0)
		terminate("%s: Cannot open for re-reading", curname);
	if ((elf = elf_begin(fd, ELF_C_READ, NULL)) == NULL)
		elfterminate(curname, "Cannot re-read");

	if ((tfd = open(newname, O_WRONLY | O_CREAT | O_TRUNC, st.st_mode)) < 0)
		terminate("Cannot open temp file %s for writing", newname);
	if ((telf = elf_begin(tfd, ELF_C_WRITE, NULL)) == NULL)
		elfterminate(curname, "Cannot write");

	data = make_ctf_data(td, elf, curname, &len, flags);
	write_file(elf, curname, telf, newname, data, len, flags);
	free(data);

	elf_end(telf);
	elf_end(elf);
	(void) close(fd);
	(void) close(tfd);
}