/*
 * *****************************************************************************
 *
 * SPDX-License-Identifier: BSD-2-Clause
 *
 * Copyright (c) 2018-2023 Gavin D. Howard and contributors.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * * Redistributions of source code must retain the above copyright notice, this
 *   list of conditions and the following disclaimer.
 *
 * * Redistributions in binary form must reproduce the above copyright notice,
 *   this list of conditions and the following disclaimer in the documentation
 *   and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 * *****************************************************************************
 *
 * Definitions for program data.
 *
 */

#ifndef BC_LANG_H
#define BC_LANG_H

#include <stdbool.h>

// These have to come first to silence a warning on BC_C11 below.
#include <status.h>
#include <vector.h>
#include <num.h>

#if BC_C11
#include <assert.h>
#endif // BC_C11

/// The instructions for bytecode.
typedef enum BcInst
{
#if BC_ENABLED
	/// Postfix increment and decrement. Prefix are translated into
	/// BC_INST_ONE with either BC_INST_ASSIGN_PLUS or BC_INST_ASSIGN_MINUS.
	BC_INST_INC = 0,
	BC_INST_DEC,
#endif // BC_ENABLED

	/// Unary negation.
	BC_INST_NEG,

	/// Boolean not.
	BC_INST_BOOL_NOT,

#if BC_ENABLE_EXTRA_MATH
	/// Truncation operator.
	BC_INST_TRUNC,
#endif // BC_ENABLE_EXTRA_MATH

	/// These should be self-explanatory.
	BC_INST_POWER,
	BC_INST_MULTIPLY,
	BC_INST_DIVIDE,
	BC_INST_MODULUS,
	BC_INST_PLUS,
	BC_INST_MINUS,

#if BC_ENABLE_EXTRA_MATH
	/// Places operator.
	BC_INST_PLACES,

	/// Shift operators.
	BC_INST_LSHIFT,
	BC_INST_RSHIFT,
#endif // BC_ENABLE_EXTRA_MATH

	/// Comparison operators.
	BC_INST_REL_EQ,
	BC_INST_REL_LE,
	BC_INST_REL_GE,
	BC_INST_REL_NE,
	BC_INST_REL_LT,
	BC_INST_REL_GT,

	/// Boolean or and and.
	BC_INST_BOOL_OR,
	BC_INST_BOOL_AND,

#if BC_ENABLED
	/// Same as the normal operators, but assigment. So ^=, *=, /=, etc.
	BC_INST_ASSIGN_POWER,
	BC_INST_ASSIGN_MULTIPLY,
	BC_INST_ASSIGN_DIVIDE,
	BC_INST_ASSIGN_MODULUS,
	BC_INST_ASSIGN_PLUS,
	BC_INST_ASSIGN_MINUS,
#if BC_ENABLE_EXTRA_MATH
	/// Places and shift assignment operators.
	BC_INST_ASSIGN_PLACES,
	BC_INST_ASSIGN_LSHIFT,
	BC_INST_ASSIGN_RSHIFT,
#endif // BC_ENABLE_EXTRA_MATH

	/// Normal assignment.
	BC_INST_ASSIGN,

	/// bc and dc detect when the value from an assignment is not necessary.
	/// For example, a plain assignment statement means the value is never used.
	/// In those cases, we can get lots of performance back by not even creating
	/// a copy at all. In fact, it saves a copy, a push onto the results stack,
	/// a pop from the results stack, and a free. Definitely worth it to detect.
	BC_INST_ASSIGN_POWER_NO_VAL,
	BC_INST_ASSIGN_MULTIPLY_NO_VAL,
	BC_INST_ASSIGN_DIVIDE_NO_VAL,
	BC_INST_ASSIGN_MODULUS_NO_VAL,
	BC_INST_ASSIGN_PLUS_NO_VAL,
	BC_INST_ASSIGN_MINUS_NO_VAL,
#if BC_ENABLE_EXTRA_MATH
	/// Same as above.
	BC_INST_ASSIGN_PLACES_NO_VAL,
	BC_INST_ASSIGN_LSHIFT_NO_VAL,
	BC_INST_ASSIGN_RSHIFT_NO_VAL,
#endif // BC_ENABLE_EXTRA_MATH
#endif // BC_ENABLED

	/// Normal assignment that pushes no value on the stack.
	BC_INST_ASSIGN_NO_VAL,

	/// Push a constant onto the results stack.
	BC_INST_NUM,

	/// Push a variable onto the results stack.
	BC_INST_VAR,

	/// Push an array element onto the results stack.
	BC_INST_ARRAY_ELEM,

	/// Push an array onto the results stack. This is different from pushing an
	/// array *element* onto the results stack; it pushes a reference to the
	/// whole array. This is needed in bc for function arguments that are
	/// arrays. It is also needed for returning the length of an array.
	BC_INST_ARRAY,

	/// Push a zero or a one onto the stack. These are special cased because it
	/// does help performance, particularly for one since inc/dec operators
	/// use it.
	BC_INST_ZERO,
	BC_INST_ONE,

#if BC_ENABLED
	/// Push the last printed value onto the stack.
	BC_INST_LAST,
#endif // BC_ENABLED

	/// Push the value of any of the globals onto the stack.
	BC_INST_IBASE,
	BC_INST_OBASE,
	BC_INST_SCALE,

#if BC_ENABLE_EXTRA_MATH
	/// Push the value of the seed global onto the stack.
	BC_INST_SEED,
#endif // BC_ENABLE_EXTRA_MATH

	/// These are builtin functions.
	BC_INST_LENGTH,
	BC_INST_SCALE_FUNC,
	BC_INST_SQRT,
	BC_INST_ABS,
	BC_INST_IS_NUMBER,
	BC_INST_IS_STRING,

#if BC_ENABLE_EXTRA_MATH
	/// Another builtin function.
	BC_INST_IRAND,
#endif // BC_ENABLE_EXTRA_MATH

	/// Asciify.
	BC_INST_ASCIIFY,

	/// Another builtin function.
	BC_INST_READ,

#if BC_ENABLE_EXTRA_MATH
	/// Another builtin function.
	BC_INST_RAND,
#endif // BC_ENABLE_EXTRA_MATH

	/// Return the max for the various globals.
	BC_INST_MAXIBASE,
	BC_INST_MAXOBASE,
	BC_INST_MAXSCALE,
#if BC_ENABLE_EXTRA_MATH
	/// Return the max value returned by rand().
	BC_INST_MAXRAND,
#endif // BC_ENABLE_EXTRA_MATH

	/// bc line_length() builtin function.
	BC_INST_LINE_LENGTH,

#if BC_ENABLED

	/// bc global_stacks() builtin function.
	BC_INST_GLOBAL_STACKS,

#endif // BC_ENABLED

	/// bc leading_zero() builtin function.
	BC_INST_LEADING_ZERO,

	/// This is slightly misnamed versus BC_INST_PRINT_POP. Well, it is in bc.
	/// dc uses this instruction to print, but not pop. That's valid in dc.
	/// However, in bc, it is *never* valid to print without popping. In bc,
	/// BC_INST_PRINT_POP is used to indicate when a string should be printed
	/// because of a print statement or whether it should be printed raw. The
	/// reason for this is because a print statement handles escaped characters.
	/// So BC_INST_PRINT_POP is for printing a string from a print statement,
	/// BC_INST_PRINT_STR is for printing a string by itself.
	///
	/// In dc, BC_INST_PRINT_POP prints and pops, and BC_INST_PRINT just prints.
	///
	/// Oh, and BC_INST_STR pushes a string onto the results stack.
	BC_INST_PRINT,
	BC_INST_PRINT_POP,
	BC_INST_STR,
#if BC_ENABLED
	BC_INST_PRINT_STR,

	/// Jumps unconditionally.
	BC_INST_JUMP,

	/// Jumps if the top of the results stack is zero (condition failed). It
	/// turns out that we only want to jump when conditions fail to "skip" code.
	BC_INST_JUMP_ZERO,

	/// Call a function.
	BC_INST_CALL,

	/// Return the top of the stack to the caller.
	BC_INST_RET,

	/// Return 0 to the caller.
	BC_INST_RET0,

	/// Special return instruction for void functions.
	BC_INST_RET_VOID,

	/// Special halt instruction.
	BC_INST_HALT,
#endif // BC_ENABLED

	/// Pop an item off of the results stack.
	BC_INST_POP,

	/// Swaps the top two items on the results stack.
	BC_INST_SWAP,

	/// Modular exponentiation.
	BC_INST_MODEXP,

	/// Do divide and modulus at the same time.
	BC_INST_DIVMOD,

	/// Turns a number into a string and prints it.
	BC_INST_PRINT_STREAM,

#if DC_ENABLED

	/// dc extended registers command.
	BC_INST_EXTENDED_REGISTERS,

	/// dc's return; it pops an executing string off of the stack.
	BC_INST_POP_EXEC,

	/// Unconditionally execute a string.
	BC_INST_EXECUTE,

	/// Conditionally execute a string.
	BC_INST_EXEC_COND,

	/// Prints each item on the results stack, separated by newlines.
	BC_INST_PRINT_STACK,

	/// Pops everything off of the results stack.
	BC_INST_CLEAR_STACK,

	/// Pushes the current length of a register stack onto the results stack.
	BC_INST_REG_STACK_LEN,

	/// Pushes the current length of the results stack onto the results stack.
	BC_INST_STACK_LEN,

	/// Pushes a copy of the item on the top of the results stack onto the
	/// results stack.
	BC_INST_DUPLICATE,

	/// Copies the value in a register and pushes the copy onto the results
	/// stack.
	BC_INST_LOAD,

	/// Pops an item off of a register stack and pushes it onto the results
	/// stack.
	BC_INST_PUSH_VAR,

	/// Pops an item off of the results stack and pushes it onto a register's
	/// stack.
	BC_INST_PUSH_TO_VAR,

	/// Quit.
	BC_INST_QUIT,

	/// Quit executing some number of strings.
	BC_INST_NQUIT,

	/// Push the depth of the execution stack onto the stack.
	BC_INST_EXEC_STACK_LEN,

#endif // DC_ENABLED

	/// Invalid instruction.
	BC_INST_INVALID,

} BcInst;

#if BC_C11
_Static_assert(BC_INST_INVALID <= UCHAR_MAX,
               "Too many instructions to fit into an unsigned char");
#endif // BC_C11

/// Used by maps to identify where items are in the array.
typedef struct BcId
{
	/// The name of the item.
	char* name;

	/// The index into the array where the item is.
	size_t idx;

} BcId;

/// The location of a var, array, or array element.
typedef struct BcLoc
{
	/// The index of the var or array.
	size_t loc;

	/// The index of the array or variable in the array stack. This is to
	/// prevent a bug with getting the wrong array element or variable after a
	/// function call. See the tests/bc/scripts/array.bc test for the array
	/// case; the variable case is in various variable tests.
	size_t stack_idx;

	/// The index of the array element. Only used for array elements.
	size_t idx;

} BcLoc;

/// An entry for a constant.
typedef struct BcConst
{
	/// The original string as parsed from the source code.
	char* val;

	/// The last base that the constant was parsed in.
	BcBigDig base;

	/// The parsed constant.
	BcNum num;

} BcConst;

/// A function. This is also used in dc, not just bc. The reason is that strings
/// are executed in dc, and they are converted to functions in order to be
/// executed.
typedef struct BcFunc
{
	/// The bytecode instructions.
	BcVec code;

#if BC_ENABLED

	/// The labels. This is a vector of indices. The index is the index into
	/// the bytecode vector where the label is.
	BcVec labels;

	/// The autos for the function. The first items are the parameters, and the
	/// arguments to the parameters must match the types in this vector.
	BcVec autos;

	/// The number of parameters the function takes.
	size_t nparams;

#endif // BC_ENABLED

	/// The function's name.
	const char* name;

#if BC_ENABLED
	/// True if the function is a void function.
	bool voidfn;
#endif // BC_ENABLED

} BcFunc;

/// Types of results that can be pushed onto the results stack.
typedef enum BcResultType
{
	/// Result is a variable.
	BC_RESULT_VAR,

	/// Result is an array element.
	BC_RESULT_ARRAY_ELEM,

	/// Result is an array. This is only allowed for function arguments or
	/// returning the length of the array.
	BC_RESULT_ARRAY,

	/// Result is a string.
	BC_RESULT_STR,

	/// Result is a temporary. This is used for the result of almost all
	/// expressions.
	BC_RESULT_TEMP,

	/// Special casing the two below gave performance improvements.

	/// Result is a 0.
	BC_RESULT_ZERO,

	/// Result is a 1. Useful for inc/dec operators.
	BC_RESULT_ONE,

#if BC_ENABLED

	/// Result is the special "last" variable.
	BC_RESULT_LAST,

	/// Result is the return value of a void function.
	BC_RESULT_VOID,
#endif // BC_ENABLED

	/// Result is the value of ibase.
	BC_RESULT_IBASE,

	/// Result is the value of obase.
	BC_RESULT_OBASE,

	/// Result is the value of scale.
	BC_RESULT_SCALE,

#if BC_ENABLE_EXTRA_MATH

	/// Result is the value of seed.
	BC_RESULT_SEED,

#endif // BC_ENABLE_EXTRA_MATH

} BcResultType;

/// A union to store data for various result types.
typedef union BcResultData
{
	/// A number. Strings are stored here too; they are numbers with
	/// cap == 0 && num == NULL. The string's index into the strings vector is
	/// stored in the scale field. But this is only used for strings stored in
	/// variables.
	BcNum n;

	/// A vector.
	BcVec v;

	/// A variable, array, or array element reference. This could also be a
	/// string if a string is not stored in a variable (dc only).
	BcLoc loc;

} BcResultData;

/// A tagged union for results.
typedef struct BcResult
{
	/// The tag. The type of the result.
	BcResultType t;

	/// The data. The data for the result.
	BcResultData d;

} BcResult;

/// An instruction pointer. This is how bc knows where in the bytecode vector,
/// and which function, the current execution is.
typedef struct BcInstPtr
{
	/// The index of the currently executing function in the fns vector.
	size_t func;

	/// The index into the bytecode vector of the *next* instruction.
	size_t idx;

	/// The length of the results vector when this function started executing.
	/// This is mostly used for bc where functions should not affect the results
	/// of their callers.
	size_t len;

} BcInstPtr;

/// Types of identifiers.
typedef enum BcType
{
	/// Variable.
	BC_TYPE_VAR,

	/// Array.
	BC_TYPE_ARRAY,

#if BC_ENABLED

	/// Array reference.
	BC_TYPE_REF,

#endif // BC_ENABLED

} BcType;

#if BC_ENABLED
/// An auto variable in bc.
typedef struct BcAuto
{
	/// The index of the variable in the vars or arrs vectors.
	size_t idx;

	/// The type of the variable.
	BcType type;

} BcAuto;
#endif // BC_ENABLED

/// Forward declaration.
struct BcProgram;

/**
 * Initializes a function.
 * @param f     The function to initialize.
 * @param name  The name of the function. The string is assumed to be owned by
 *              some other entity.
 */
void
bc_func_init(BcFunc* f, const char* name);

/**
 * Inserts an auto into the function.
 * @param f     The function to insert into.
 * @param p     The program. This is to search for the variable or array name.
 * @param name  The name of the auto to insert.
 * @param type  The type of the auto.
 * @param line  The line in the source code where the insert happened. This is
 *              solely for error reporting.
 */
void
bc_func_insert(BcFunc* f, struct BcProgram* p, char* name, BcType type,
               size_t line);

/**
 * Resets a function in preparation for it to be reused. This can happen in bc
 * because it is a dynamic language and functions can be redefined.
 * @param f  The functio to reset.
 */
void
bc_func_reset(BcFunc* f);

#if BC_DEBUG
/**
 * Frees a function. This is a destructor. This is only used in debug builds
 * because all functions are freed at exit. We free them in debug builds to
 * check for memory leaks.
 * @param func  The function to free as a void pointer.
 */
void
bc_func_free(void* func);
#endif // BC_DEBUG

/**
 * Initializes an array, which is the array type in bc and dc source code. Since
 * variables and arrays are both arrays (see the development manual,
 * manuals/development.md#execution, for more information), the @a nums
 * parameter tells bc whether to initialize an array of numbers or an array of
 * arrays of numbers. If the latter, it does a recursive call with nums set to
 * true.
 * @param a     The array to initialize.
 * @param nums  True if the array should be for numbers, false if it should be
 *              for vectors.
 */
void
bc_array_init(BcVec* a, bool nums);

/**
 * Copies an array to another array. This is used to do pass arrays to functions
 * that do not take references to arrays. The arrays are passed entirely by
 * value, which means that they need to be copied.
 * @param d  The destination array.
 * @param s  The source array.
 */
void
bc_array_copy(BcVec* d, const BcVec* s);

/**
 * Frees a string stored in a function. This is a destructor.
 * @param string  The string to free as a void pointer.
 */
void
bc_string_free(void* string);

/**
 * Frees a constant stored in a function. This is a destructor.
 * @param constant  The constant to free as a void pointer.
 */
void
bc_const_free(void* constant);

/**
 * Clears a result. It sets the type to BC_RESULT_TEMP and clears the union by
 * clearing the BcNum in the union. This is to ensure that bc does not use
 * uninitialized data.
 * @param r  The result to clear.
 */
void
bc_result_clear(BcResult* r);

/**
 * Copies a result into another. This is done for things like duplicating the
 * top of the results stack or copying the result of an assignment to put back
 * on the results stack.
 * @param d    The destination result.
 * @param src  The source result.
 */
void
bc_result_copy(BcResult* d, BcResult* src);

/**
 * Frees a result. This is a destructor.
 * @param result  The result to free as a void pointer.
 */
void
bc_result_free(void* result);

/**
 * Expands an array to @a len. This can happen because in bc, you do not have to
 * explicitly initialize elements of an array. If you access an element that is
 * not initialized, the array is expanded to fit it, and all missing elements
 * are initialized to 0 if they are numbers, or arrays with one element of 0.
 * This function does that expansion.
 * @param a    The array to expand.
 * @param len  The length to expand to.
 */
void
bc_array_expand(BcVec* a, size_t len);

#if BC_ENABLED

/**
 * Returns non-zero if the bytecode instruction i is an assignment instruction.
 * @param i  The instruction to test.
 * @return   Non-zero if i is an assignment instruction, zero otherwise.
 */
#define BC_INST_IS_ASSIGN(i) \
	((i) == BC_INST_ASSIGN || (i) == BC_INST_ASSIGN_NO_VAL)

/**
 * Returns true if the bytecode instruction @a i requires the value to be
 * returned for use.
 * @param i  The instruction to test.
 * @return   True if @a i requires the value to be returned for use, false
 *           otherwise.
 */
#define BC_INST_USE_VAL(i) ((i) <= BC_INST_ASSIGN)

#else // BC_ENABLED

/**
 * Returns non-zero if the bytecode instruction i is an assignment instruction.
 * @param i  The instruction to test.
 * @return   Non-zero if i is an assignment instruction, zero otherwise.
 */
#define BC_INST_IS_ASSIGN(i) ((i) == BC_INST_ASSIGN_NO_VAL)

/**
 * Returns true if the bytecode instruction @a i requires the value to be
 * returned for use.
 * @param i  The instruction to test.
 * @return   True if @a i requires the value to be returned for use, false
 *           otherwise.
 */
#define BC_INST_USE_VAL(i) (false)

#endif // BC_ENABLED

#if BC_DEBUG_CODE
/// Reference to string names for all of the instructions. For debugging.
extern const char* bc_inst_names[];
#endif // BC_DEBUG_CODE

/// References to the names of the main and read functions.
extern const char bc_func_main[];
extern const char bc_func_read[];

#endif // BC_LANG_H