/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#pragma ident	"%Z%%M%	%I%	%E% SMI"

#include	"gprof.h"
#include	<stdlib.h>
#include	<sys/file.h>
#include	<fcntl.h>
#include	<unistd.h>
#include	<string.h>
#include	<sysexits.h>
#include	<libelf.h>
#include 	"gelf.h"

#ifdef DEBUG
static void	debug_dup_del(nltype *, nltype *);

#define	DPRINTF(msg, file)	if (debug & ELFDEBUG) \
					(void) printf(msg, file);

#define	PRINTF(msg)		if (debug & ELFDEBUG) \
					(void) printf(msg);

#define	DEBUG_DUP_DEL(keeper, louser)	if (debug & ELFDEBUG) \
						debug_dup_del(keeper, louser);

#else
#define	DPRINTF(msg, file)
#define	PRINTF(msg)
#define	DEBUG_DUP_DEL(keeper, louser)
#endif

size_t	textbegin, textsize;

/* Prototype definitions first */

static void	process(char *filename, int fd);
static void	get_symtab(Elf *elf, mod_info_t *module);
static void	get_textseg(Elf *elf, int fd);
static void	save_aout_info(char *);

static void
fatal_error(char *error)
{
	(void) fprintf(stderr,
	    "Fatal ELF error: %s (%s)\n", error, elf_errmsg(-1));
	exit(EX_SOFTWARE);
}

bool
is_shared_obj(char *name)
{
	int		fd;
	Elf		*elf;
	GElf_Ehdr	ehdr;

	if ((fd = open(name, O_RDONLY)) == -1) {
		(void) fprintf(stderr, "%s: can't open `%s'\n", whoami, name);
		exit(EX_NOINPUT);
	}

	if (elf_version(EV_CURRENT) == EV_NONE)
		fatal_error("libelf is out of date");

	if ((elf = elf_begin(fd, ELF_C_READ, NULL)) == NULL)
		fatal_error("can't read as ELF file");

	if (gelf_getehdr(elf, &ehdr) == NULL)
		fatal_error("can't read ehdr");

	(void) elf_end(elf);
	(void) close(fd);

	if (ehdr.e_type == ET_DYN)
		return (TRUE);
	else
		return (FALSE);
}

static void
save_aout_info(char *aoutname)
{
	struct stat		buf;
	extern fl_info_t	aout_info;

	if (stat(aoutname, &buf) == -1) {
		(void) fprintf(stderr, "%s: can't get info on `%s'\n",
							whoami, aoutname);
		exit(EX_NOINPUT);
	}

	aout_info.dev = buf.st_dev;
	aout_info.ino = buf.st_ino;
	aout_info.mtime = buf.st_mtime;
	aout_info.size = buf.st_size;
}

void
getnfile(char *aoutname)
{
	int	fd;

	DPRINTF(" Attempting to open %s  \n", aoutname);
	if ((fd = open((aoutname), O_RDONLY)) == -1) {
		(void) fprintf(stderr, "%s: can't open `%s'\n",
							whoami, aoutname);
		exit(EX_NOINPUT);
	}
	process(aoutname, fd);
	save_aout_info(aoutname);

	(void) close(fd);
}

static GElf_Addr
get_txtorigin(Elf *elf)
{
	GElf_Ehdr	ehdr;
	GElf_Phdr	phdr;
	GElf_Half	ndx;
	GElf_Addr	txt_origin = 0;
	bool		first_load_seg = TRUE;

	if (gelf_getehdr(elf, &ehdr) == NULL)
		fatal_error("can't read ehdr");

	for (ndx = 0; ndx < ehdr.e_phnum; ndx++) {
		if (gelf_getphdr(elf, ndx, &phdr) == NULL)
			continue;

		if ((phdr.p_type == PT_LOAD) && !(phdr.p_flags & PF_W)) {
			if (first_load_seg || phdr.p_vaddr < txt_origin)
				txt_origin = phdr.p_vaddr;

			if (first_load_seg)
				first_load_seg = FALSE;
		}
	}

	return (txt_origin);
}

void
process_namelist(mod_info_t *module)
{
	int		fd;
	Elf		*elf;

	if ((fd = open(module->name, O_RDONLY)) == -1) {
		(void) fprintf(stderr, "%s: can't read %s\n",
							whoami, module->name);
		(void) fprintf(stderr, "Exiting due to error(s)...\n");
		exit(EX_NOINPUT);
	}

	/*
	 * libelf's version already verified in processing a.out,
	 * so directly do elf_begin()
	 */
	if ((elf = elf_begin(fd, ELF_C_READ, NULL)) == NULL)
		fatal_error("can't read as ELF file");

	module->next = NULL;
	module->txt_origin = get_txtorigin(elf);
	get_symtab(elf, module);
	module->active = TRUE;
}

/*
 * Get the ELF header and,  if it exists, call get_symtab()
 * to begin processing of the file; otherwise, return from
 * processing the file with a warning.
 */
static void
process(char *filename, int fd)
{
	Elf			*elf;
	extern bool		cflag;
	extern bool		Bflag;

	if (elf_version(EV_CURRENT) == EV_NONE)
		fatal_error("libelf is out of date");

	if ((elf = elf_begin(fd, ELF_C_READ, NULL)) == NULL)
		fatal_error("can't read as ELF file");

	if (gelf_getclass(elf) == ELFCLASS64)
		Bflag = TRUE;

	/*
	 * Initialize active modules list. Note that we set the end
	 * address while reading the symbol table, in get_symtab
	 */
	modules.id = 1;
	modules.next = NULL;
	modules.txt_origin = get_txtorigin(elf);
	modules.load_base = modules.txt_origin;
	if ((modules.name = malloc(strlen(filename) + 1)) == NULL) {
		(void) fprintf(stderr, "%s: can't malloc %d bytes",
					    whoami, strlen(filename) + 1);
		exit(EX_UNAVAILABLE);
	}
	(void) strcpy(modules.name, filename);

	get_symtab(elf, &modules);

	modules.load_end = modules.data_end;
	modules.active = TRUE;
	n_modules = 1;

	if (cflag)
		get_textseg(elf, fd);
}

static void
get_textseg(Elf *elf, int fd)
{
	GElf_Ehdr ehdr;
	GElf_Phdr phdr;
	GElf_Half i;

	if (gelf_getehdr(elf, &ehdr) == NULL)
		fatal_error("can't read ehdr");

	for (i = 0; i < ehdr.e_phnum; i++) {

		if (gelf_getphdr(elf, i, &phdr) == NULL)
			continue;

		if (!(phdr.p_flags & PF_W) && (phdr.p_filesz > textsize)) {
			size_t chk;

			/*
			 * We could have multiple loadable text segments;
			 * keep the largest we find.
			 */
			if (textspace)
				free(textspace);

			/*
			 * gprof is a 32-bit program;  if this text segment
			 * has a > 32-bit offset or length, it's too big.
			 */
			chk = (size_t)phdr.p_vaddr + (size_t)phdr.p_filesz;
			if (phdr.p_vaddr + phdr.p_filesz != (GElf_Xword)chk)
				fatal_error("text segment too large for -c");

			textbegin = (size_t)phdr.p_vaddr;
			textsize = (size_t)phdr.p_filesz;

			textspace = malloc(textsize);

			if (lseek(fd, (off_t)phdr.p_offset, SEEK_SET) !=
			    (off_t)phdr.p_offset)
				fatal_error("cannot seek to text section");

			if (read(fd, textspace, textsize) != textsize)
				fatal_error("cannot read text");
		}
	}

	if (textsize == 0)
		fatal_error("can't find text segment");
}

#ifdef DEBUG
static void
debug_dup_del(nltype * keeper, nltype * louser)
{
	(void) printf("remove_dup_syms: discarding sym %s over sym %s\n",
		louser->name, keeper->name);
}
#endif /* DEBUG */

static void
remove_dup_syms(nltype *nl, sztype *sym_count)
{
	int	i;
	int	index;
	int	nextsym;

	nltype *	orig_list;
	if ((orig_list = malloc(sizeof (nltype) * *sym_count)) == NULL) {
		(void) fprintf(stderr,
		    "gprof: remove_dup_syms: malloc failed\n");
		(void) fprintf(stderr, "Exiting due to error(s)...\n");
		exit(EX_UNAVAILABLE);
	}
	(void) memcpy(orig_list, nl, sizeof (nltype) * *sym_count);

	for (i = 0, index = 0, nextsym = 1; nextsym < *sym_count; nextsym++) {
		int	i_type;
		int	n_bind;
		int	n_type;

		/*
		 * If orig_list[nextsym] points to a new symvalue, then we
		 * will copy our keeper and move on to the next symbol.
		 */
		if ((orig_list + i)->value < (orig_list + nextsym)->value) {
			*(nl + index++) = *(orig_list +i);
			i = nextsym;
			continue;
		}

		/*
		 * If these two symbols have the same info, then we
		 * keep the first and keep checking for dups.
		 */
		if ((orig_list + i)->syminfo ==
		    (orig_list + nextsym)->syminfo) {
			DEBUG_DUP_DEL(orig_list + i, orig_list + nextsym);
			continue;
		}
		n_bind = ELF32_ST_BIND((orig_list + nextsym)->syminfo);
		i_type = ELF32_ST_TYPE((orig_list + i)->syminfo);
		n_type = ELF32_ST_TYPE((orig_list + nextsym)->syminfo);

		/*
		 * If they have the same type we take the stronger
		 * bound function.
		 */
		if (i_type == n_type) {
			if (n_bind == STB_WEAK) {
				DEBUG_DUP_DEL((orig_list + i),
				    (orig_list + nextsym));
				continue;
			}
			DEBUG_DUP_DEL((orig_list + nextsym),
			    (orig_list + i));
			i = nextsym;
			continue;
		}

		/*
		 * If the first symbol isn't of type NOTYPE then it must
		 * be the keeper.
		 */
		if (i_type != STT_NOTYPE) {
			DEBUG_DUP_DEL((orig_list + i),
			    (orig_list + nextsym));
			continue;
		}

		/*
		 * Throw away the first one and take the new
		 * symbol
		 */
		DEBUG_DUP_DEL((orig_list + nextsym), (orig_list + i));
		i = nextsym;
	}

	if ((orig_list + i)->value > (nl + index - 1)->value)
		*(nl + index++) = *(orig_list +i);

	*sym_count = index;
}

/*
 * compare either by name or by value for sorting.
 * This is the comparison function called by qsort to
 * sort the symbols either by name or value when requested.
 */
static int
compare(const void *arg1, const void *arg2)
{
	nltype *a = (nltype *)arg1;
	nltype *b = (nltype *)arg2;

	if (a->value > b->value)
		return (1);
	else
		return ((a->value == b->value) - 1);
}

static int
is_function(Elf *elf, GElf_Sym *sym)
{
	Elf_Scn *scn;
	GElf_Shdr shdr;

	/*
	 * With shared objects, it is possible we come across a function
	 * that's global, but is undefined. The definition is probably
	 * elsewhere, so we'll have to skip it as far as this object is
	 * concerned.
	 */
	if (sym->st_shndx == SHN_UNDEF)
		return (0);

	if (GELF_ST_TYPE(sym->st_info) == STT_FUNC) {
		if (GELF_ST_BIND(sym->st_info) == STB_GLOBAL)
			return (1);

		if (GELF_ST_BIND(sym->st_info) == STB_WEAK)
			return (1);

		if (!aflag && GELF_ST_BIND(sym->st_info) == STB_LOCAL)
			return (1);
	}

	/*
	 * It's not a function; determine if it's in an executable section.
	 */
	if (GELF_ST_TYPE(sym->st_info) != STT_NOTYPE)
		return (0);

	/*
	 * If it isn't global, and it isn't weak, and it either isn't
	 * local or the "all flag" isn't set, then get out.
	 */
	if (GELF_ST_BIND(sym->st_info) != STB_GLOBAL &&
	    GELF_ST_BIND(sym->st_info) != STB_WEAK &&
	    (GELF_ST_BIND(sym->st_info) != STB_LOCAL || aflag))
		return (0);

	if (sym->st_shndx >= SHN_LORESERVE)
		return (0);

	scn = elf_getscn(elf, sym->st_shndx);
	(void) gelf_getshdr(scn, &shdr);

	if (!(shdr.sh_flags & SHF_EXECINSTR))
		return (0);

	return (1);
}

static void
get_symtab(Elf *elf, mod_info_t *module)
{
	Elf_Scn		*scn = NULL, *sym_pri = NULL, *sym_aux = NULL;
	GElf_Word	strndx = 0;
	sztype		nsyms, i;
	Elf_Data	*symdata_pri;
	Elf_Data	*symdata_aux;
	GElf_Xword	nsyms_pri, nsyms_aux = 0;
	nltype		*etext = NULL;
	nltype		*l_nl, *l_npe;
	sztype		l_nname;
	extern sztype	total_names;
	int		symtab_found = 0;


	/*
	 * Scan the section headers looking for a symbol table. Our
	 * preference is to use .symtab, because it contains the full
	 * set of symbols. If we find it, we stop looking immediately
	 * and use it. In the absence of a .symtab section, we are
	 * willing to use the dynamic symbol table (.dynsym), possibly
	 * augmented by the .SUNW_ldynsym, which contains local symbols.
	 */
	while ((symtab_found == 0) && ((scn = elf_nextscn(elf, scn)) != NULL)) {
		GElf_Shdr shdr;

		if (gelf_getshdr(scn, &shdr) == NULL)
			continue;

		switch (shdr.sh_type) {
		case SHT_SYMTAB:
			nsyms_pri = shdr.sh_size / shdr.sh_entsize;
			strndx = shdr.sh_link;
			sym_pri = scn;
			/* Throw away .SUNW_ldynsym. It is for .dynsym only */
			nsyms_aux = 0;
			sym_aux = NULL;
			/* We have found the best symbol table. Stop looking */
			symtab_found = 1;
			break;

		case SHT_DYNSYM:
			/* We will use .dynsym if no .symtab is found */
			nsyms_pri = shdr.sh_size / shdr.sh_entsize;
			strndx = shdr.sh_link;
			sym_pri = scn;
			break;

		case SHT_SUNW_LDYNSYM:
			/* Auxiliary table, used with .dynsym */
			nsyms_aux = shdr.sh_size / shdr.sh_entsize;
			sym_aux = scn;
			break;
		}
	}

	if (sym_pri == NULL || strndx == 0)
		fatal_error("can't find symbol table.\n");

	nsyms = (sztype)(nsyms_pri + nsyms_aux);
	if ((nsyms_pri + nsyms_aux) != (GElf_Xword)nsyms)
		fatal_error(
		    "32-bit gprof cannot handle more than 2^32 symbols");

	if ((symdata_pri = elf_getdata(sym_pri, NULL)) == NULL)
		fatal_error("can't read symbol data.\n");

	if ((sym_aux != NULL) &&
	    ((symdata_aux = elf_getdata(sym_aux, NULL)) == NULL))
		fatal_error("can't read .SUNW_ldynsym symbol data.\n");

	if ((l_nl = l_npe = (nltype *)calloc(nsyms + PRF_SYMCNT,
	    sizeof (nltype))) == NULL)
		fatal_error("cannot allocate symbol data.\n");

	/*
	 * Now we need to cruise through the symbol table eliminating
	 * all non-functions from consideration, and making strings
	 * real.
	 */
	l_nname = 0;

	for (i = 1; i < nsyms; i++) {
		GElf_Sym gsym;
		char *name;

		/*
		 * Look up the symbol. In the case where we have a
		 * .SUNW_ldynsym/.dynsym pair, we treat them as a single
		 * logical table, with the data from .SUNW_ldynsym coming
		 * before the data in .dynsym.
		 */
		if (i >= nsyms_aux)
			(void) gelf_getsym(symdata_pri, i - nsyms_aux, &gsym);
		else
			(void) gelf_getsym(symdata_aux, i, &gsym);

		name = elf_strptr(elf, strndx, gsym.st_name);

		/*
		 * We're interested in this symbol if it's a function or
		 * if it's the symbol "_etext"
		 */
		if (is_function(elf, &gsym) || strcmp(name, PRF_ETEXT) == 0) {

			l_npe->name = name;
			l_npe->value = gsym.st_value;
			l_npe->sz = gsym.st_size;
			l_npe->syminfo = gsym.st_info;
			l_npe->module = module;

			if (strcmp(name, PRF_ETEXT) == 0)
				etext = l_npe;

			if (lflag == TRUE &&
			    GELF_ST_BIND(gsym.st_info) == STB_LOCAL) {
				/*
				 * If the "locals only" flag is on, then
				 * we add the local symbols to the
				 * exclusion lists.
				 */
				addlist(Elist, name);
				addlist(elist, name);
			}
			DPRINTF("Index %lld:", l_nname);
			DPRINTF("\tValue: 0x%llx\t", l_npe->value);
			DPRINTF("Name: %s \n", l_npe->name);
			l_npe++;
			l_nname++;
		}

		if (strcmp(name, PRF_END) == 0)
			module->data_end = gsym.st_value;
	}

	if (l_npe == l_nl)
		fatal_error("no valid functions found");

	/*
	 * Finally, we need to construct some dummy entries.
	 */
	if (etext) {
		l_npe->name = PRF_EXTSYM;
		l_npe->value = etext->value + 1;
		l_npe->syminfo = GELF_ST_INFO(STB_GLOBAL, STT_FUNC);
		l_npe->module = module;
		l_npe++;
		l_nname++;
	}

	l_npe->name = PRF_MEMTERM;
	l_npe->value = (pctype)-1;
	l_npe->syminfo = GELF_ST_INFO(STB_GLOBAL, STT_FUNC);
	l_npe->module = module;
	l_npe++;
	l_nname++;

	/*
	 * We're almost done;  all we need to do is sort the symbols
	 * and then remove the duplicates.
	 */
	qsort(l_nl, (size_t)l_nname, sizeof (nltype), compare);
	remove_dup_syms(l_nl, &l_nname);

	module->nl = l_nl;
	module->npe = l_npe;
	module->nname = l_nname;

	total_names += l_nname;
}