src/curves/prj_pt.c

/*
 *  Copyright (C) 2017 - This file is part of libecc project
 *
 *  Authors:
 *      Ryad BENADJILA <ryadbenadjila@gmail.com>
 *      Arnaud EBALARD <arnaud.ebalard@ssi.gouv.fr>
 *      Jean-Pierre FLORI <jean-pierre.flori@ssi.gouv.fr>
 *
 *  Contributors:
 *      Nicolas VIVET <nicolas.vivet@ssi.gouv.fr>
 *      Karim KHALFALLAH <karim.khalfallah@ssi.gouv.fr>
 *
 *  This software is licensed under a dual BSD and GPL v2 license.
 *  See LICENSE file at the root folder of the project.
 */
#include <libecc/curves/ec_shortw.h>
#include <libecc/curves/prj_pt.h>
#include <libecc/nn/nn_logical.h>
#include <libecc/nn/nn_add.h>
#include <libecc/nn/nn_rand.h>
#include <libecc/fp/fp_add.h>
#include <libecc/fp/fp_mul.h>
#include <libecc/fp/fp_montgomery.h>
#include <libecc/fp/fp_rand.h>

#define PRJ_PT_MAGIC ((word_t)(0xe1cd70babb1d5afeULL))

/*
 * Check given projective point has been correctly initialized (using
 * prj_pt_init()). Returns 0 on success, -1 on error.
 */
int prj_pt_check_initialized(prj_pt_src_t in)
{
	int ret;

	MUST_HAVE(((in != NULL) && (in->magic == PRJ_PT_MAGIC)), ret, err);
	ret = ec_shortw_crv_check_initialized(in->crv);

err:
	return ret;
}

/*
 * Initialize the projective point structure on given curve as the point at
 * infinity. The function returns 0 on success, -1 on error.
 */
int prj_pt_init(prj_pt_t in, ec_shortw_crv_src_t curve)
{
	int ret;

	ret = ec_shortw_crv_check_initialized(curve); EG(ret, err);

	MUST_HAVE((in != NULL), ret, err);

	ret = fp_init(&(in->X), curve->a.ctx); EG(ret, err);
	ret = fp_init(&(in->Y), curve->a.ctx); EG(ret, err);
	ret = fp_init(&(in->Z), curve->a.ctx); EG(ret, err);
	in->crv = curve;
	in->magic = PRJ_PT_MAGIC;

err:
	return ret;
}

/*
 * Initialize the projective point structure on given curve using given
 * coordinates. The function returns 0 on success, -1 on error.
 */
int prj_pt_init_from_coords(prj_pt_t in,
			     ec_shortw_crv_src_t curve,
			     fp_src_t xcoord, fp_src_t ycoord, fp_src_t zcoord)
{
	int ret;

	ret = prj_pt_init(in, curve); EG(ret, err);
	ret = fp_copy(&(in->X), xcoord); EG(ret, err);
	ret = fp_copy(&(in->Y), ycoord); EG(ret, err);
	ret = fp_copy(&(in->Z), zcoord);

err:
	return ret;
}

/*
 * Uninit given projective point structure. The function returns 0 on success,
 * -1 on error. This is an error if passed point has not already been
 * initialized first.
 */
void prj_pt_uninit(prj_pt_t in)
{
	if((in != NULL) && (in->magic == PRJ_PT_MAGIC) && (in->crv != NULL)){
		in->crv = NULL;
		in->magic = WORD(0);

		fp_uninit(&(in->X));
		fp_uninit(&(in->Y));
		fp_uninit(&(in->Z));
	}

	return;
}

/*
 * Checks if projective point is the point at infinity (last coordinate is 0).
 * In that case, 'iszero' out parameter is set to 1. It is set to 0 if the
 * point is not the point at infinity. The function returns 0 on success, -1 on
 * error. On error, 'iszero' is not meaningful.
 */
int prj_pt_iszero(prj_pt_src_t in, int *iszero)
{
	int ret;

	ret = prj_pt_check_initialized(in); EG(ret, err);
	ret = fp_iszero(&(in->Z), iszero);

err:
	return ret;
}

/*
 * Set given projective point 'out' to the point at infinity. The functions
 * returns 0 on success, -1 on error.
 */
int prj_pt_zero(prj_pt_t out)
{
	int ret;

	ret = prj_pt_check_initialized(out); EG(ret, err);

	ret = fp_zero(&(out->X)); EG(ret, err);
	ret = fp_one(&(out->Y)); EG(ret, err);
	ret = fp_zero(&(out->Z));

err:
	return ret;
}

/*
 * Check if a projective point is indeed on its curve. The function sets
 * 'on_curve' out parameter to 1 if the point is on the curve, 0 if not.
 * The function returns 0 on success, -1 on error. 'on_curve' is not
 * meaningful on error.
 */
int prj_pt_is_on_curve(prj_pt_src_t in,  int *on_curve)
{
	int ret, cmp;

	/* In order to check that we are on the curve, we
	 * use the projective formula of the curve:
	 *
	 *   Y**2 * Z = X**3 + a * X * Z**2 + b * Z**3
	 *
	 */
	fp X, Y, Z;
	X.magic = Y.magic = Z.magic = WORD(0);

	ret = prj_pt_check_initialized(in); EG(ret, err);
	ret = ec_shortw_crv_check_initialized(in->crv); EG(ret, err);
	MUST_HAVE((on_curve != NULL), ret, err);

	ret = fp_init(&X, in->X.ctx); EG(ret, err);
	ret = fp_init(&Y, in->X.ctx); EG(ret, err);
	ret = fp_init(&Z, in->X.ctx); EG(ret, err);

	/* Compute X**3 + a * X * Z**2 + b * Z**3 on one side */
	ret = fp_sqr(&X, &(in->X)); EG(ret, err);
	ret = fp_mul(&X, &X, &(in->X)); EG(ret, err);
	ret = fp_mul(&Z, &(in->X), &(in->crv->a)); EG(ret, err);
	ret = fp_mul(&Y, &(in->crv->b), &(in->Z)); EG(ret, err);
	ret = fp_add(&Z, &Z, &Y); EG(ret, err);
	ret = fp_mul(&Z, &Z, &(in->Z)); EG(ret, err);
	ret = fp_mul(&Z, &Z, &(in->Z)); EG(ret, err);
	ret = fp_add(&X, &X, &Z); EG(ret, err);

	/* Compute Y**2 * Z on the other side */
	ret = fp_sqr(&Y, &(in->Y)); EG(ret, err);
	ret = fp_mul(&Y, &Y, &(in->Z)); EG(ret, err);

	/* Compare the two values */
	ret = fp_cmp(&X, &Y, &cmp); EG(ret, err);

	(*on_curve) = (!cmp);

err:
	fp_uninit(&X);
	fp_uninit(&Y);
	fp_uninit(&Z);

	return ret;
}

/*
 * The function copies 'in' projective point to 'out'. 'out' is initialized by
 * the function. The function returns 0 on sucess, -1 on error.
 */
int prj_pt_copy(prj_pt_t out, prj_pt_src_t in)
{
	int ret;

	ret = prj_pt_check_initialized(in); EG(ret, err);

	ret = prj_pt_init(out, in->crv); EG(ret, err);

	ret = fp_copy(&(out->X), &(in->X)); EG(ret, err);
	ret = fp_copy(&(out->Y), &(in->Y)); EG(ret, err);
	ret = fp_copy(&(out->Z), &(in->Z));

err:
	return ret;
}

/*
 * Convert given projective point 'in' to affine representation in 'out'. 'out'
 * is initialized by the function. The function returns 0 on success, -1 on
 * error. Passing the point at infinty to the function is considered as an
 * error.
 */
int prj_pt_to_aff(aff_pt_t out, prj_pt_src_t in)
{
	int ret, iszero;

	ret = prj_pt_check_initialized(in); EG(ret, err);

	ret = prj_pt_iszero(in, &iszero); EG(ret, err);
	MUST_HAVE((!iszero), ret, err);

	ret = aff_pt_init(out, in->crv); EG(ret, err);

	ret = fp_inv(&(out->x), &(in->Z)); EG(ret, err);
	ret = fp_mul(&(out->y), &(in->Y), &(out->x)); EG(ret, err);
	ret = fp_mul(&(out->x), &(in->X), &(out->x));

err:
	return ret;
}

/*
 * Get the unique Z = 1 projective point representation ("equivalent" to affine
 * point). The function returns 0 on success, -1 on error.
 */
int prj_pt_unique(prj_pt_t out, prj_pt_src_t in)
{
	int ret, iszero;

	ret = prj_pt_check_initialized(in); EG(ret, err);
	ret = prj_pt_iszero(in, &iszero); EG(ret, err);
	MUST_HAVE((!iszero), ret, err);

	if(out == in){
		/* Aliasing case */
		fp tmp;
		tmp.magic = WORD(0);

		ret = fp_init(&tmp, (in->Z).ctx); EG(ret, err);
		ret = fp_inv(&tmp, &(in->Z)); EG(ret, err1);
		ret = fp_mul(&(out->Y), &(in->Y), &tmp); EG(ret, err1);
		ret = fp_mul(&(out->X), &(in->X), &tmp); EG(ret, err1);
		ret = fp_one(&(out->Z)); EG(ret, err1);
err1:
		fp_uninit(&tmp); EG(ret, err);
	}
	else{
	        ret = prj_pt_init(out, in->crv); EG(ret, err);
		ret = fp_inv(&(out->X), &(in->Z)); EG(ret, err);
		ret = fp_mul(&(out->Y), &(in->Y), &(out->X)); EG(ret, err);
		ret = fp_mul(&(out->X), &(in->X), &(out->X)); EG(ret, err);
		ret = fp_one(&(out->Z)); EG(ret, err);
	}


err:
	return ret;
}

/*
 * Converts affine point 'in' to projective representation in 'out'. 'out' is
 * initialized by the function. The function returns 0 on success, -1 on error.
 */
int ec_shortw_aff_to_prj(prj_pt_t out, aff_pt_src_t in)
{
	int ret, on_curve;

	ret = aff_pt_check_initialized(in); EG(ret, err);

	/* The input affine point must be on the curve */
	ret = aff_pt_is_on_curve(in, &on_curve); EG(ret, err);
	MUST_HAVE(on_curve, ret, err);

	ret = prj_pt_init(out, in->crv); EG(ret, err);
	ret = fp_copy(&(out->X), &(in->x)); EG(ret, err);
	ret = fp_copy(&(out->Y), &(in->y)); EG(ret, err);
	ret = nn_one(&(out->Z).fp_val); /* Z = 1 */

err:
	return ret;
}

/*
 * Compare projective points 'in1' and 'in2'. On success, 'cmp' is set to
 * the result of the comparison (0 if in1 == in2, !0 if in1 != in2). The
 * function returns 0 on success, -1 on error.
 */
int prj_pt_cmp(prj_pt_src_t in1, prj_pt_src_t in2, int *cmp)
{
	fp X1, X2, Y1, Y2;
	int ret, x_cmp, y_cmp;
	X1.magic = X2.magic = Y1.magic = Y2.magic = WORD(0);

	MUST_HAVE((cmp != NULL), ret, err);
	ret = prj_pt_check_initialized(in1); EG(ret, err);
	ret = prj_pt_check_initialized(in2); EG(ret, err);

	MUST_HAVE((in1->crv == in2->crv), ret, err);

	ret = fp_init(&X1, (in1->X).ctx); EG(ret, err);
	ret = fp_init(&X2, (in2->X).ctx); EG(ret, err);
	ret = fp_init(&Y1, (in1->Y).ctx); EG(ret, err);
	ret = fp_init(&Y2, (in2->Y).ctx); EG(ret, err);

	/*
	 * Montgomery multiplication is used as it is faster than
	 * usual multiplication and the spurious multiplicative
	 * factor does not matter.
	 */
	ret = fp_mul_monty(&X1, &(in1->X), &(in2->Z)); EG(ret, err);
	ret = fp_mul_monty(&X2, &(in2->X), &(in1->Z)); EG(ret, err);
	ret = fp_mul_monty(&Y1, &(in1->Y), &(in2->Z)); EG(ret, err);
	ret = fp_mul_monty(&Y2, &(in2->Y), &(in1->Z)); EG(ret, err);

	ret = fp_mul_monty(&X1, &(in1->X), &(in2->Z)); EG(ret, err);
	ret = fp_mul_monty(&X2, &(in2->X), &(in1->Z)); EG(ret, err);
	ret = fp_mul_monty(&Y1, &(in1->Y), &(in2->Z)); EG(ret, err);
	ret = fp_mul_monty(&Y2, &(in2->Y), &(in1->Z)); EG(ret, err);
	ret = fp_cmp(&X1, &X2, &x_cmp); EG(ret, err);
	ret = fp_cmp(&Y1, &Y2, &y_cmp);

	if (!ret) {
		(*cmp) = (x_cmp | y_cmp);
	}

err:
	fp_uninit(&Y2);
	fp_uninit(&Y1);
	fp_uninit(&X2);
	fp_uninit(&X1);

	return ret;
}

/*
 * NOTE: this internal functions assumes that upper layer have checked that in1 and in2
 * are initialized, and that cmp is not NULL.
 */
ATTRIBUTE_WARN_UNUSED_RET static inline int _prj_pt_eq_or_opp_X(prj_pt_src_t in1, prj_pt_src_t in2, int *cmp)
{
	int ret;
	fp X1, X2;
	X1.magic = X2.magic = WORD(0);

	/*
	 * Montgomery multiplication is used as it is faster than
	 * usual multiplication and the spurious multiplicative
	 * factor does not matter.
	 */
	ret = fp_init(&X1, (in1->X).ctx); EG(ret, err);
	ret = fp_init(&X2, (in2->X).ctx); EG(ret, err);
	ret = fp_mul_monty(&X1, &(in1->X), &(in2->Z)); EG(ret, err);
	ret = fp_mul_monty(&X2, &(in2->X), &(in1->Z)); EG(ret, err);
	ret = fp_cmp(&X1, &X2, cmp);

err:
	fp_uninit(&X1);
	fp_uninit(&X2);

	return ret;
}

/*
 * NOTE: this internal functions assumes that upper layer have checked that in1 and in2
 * are initialized, and that eq_or_opp is not NULL.
 */
ATTRIBUTE_WARN_UNUSED_RET static inline int _prj_pt_eq_or_opp_Y(prj_pt_src_t in1, prj_pt_src_t in2, int *eq_or_opp)
{
	int ret;
	fp Y1, Y2;
	Y1.magic = Y2.magic = WORD(0);

	/*
	 * Montgomery multiplication is used as it is faster than
	 * usual multiplication and the spurious multiplicative
	 * factor does not matter.
	 */
	ret = fp_init(&Y1, (in1->Y).ctx); EG(ret, err);
	ret = fp_init(&Y2, (in2->Y).ctx); EG(ret, err);
	ret = fp_mul_monty(&Y1, &(in1->Y), &(in2->Z)); EG(ret, err);
	ret = fp_mul_monty(&Y2, &(in2->Y), &(in1->Z)); EG(ret, err);
	ret = fp_eq_or_opp(&Y1, &Y2, eq_or_opp);

err:
	fp_uninit(&Y1);
	fp_uninit(&Y2);

	return ret;
}

 /*
 * The functions tests if given projective points 'in1' and 'in2' are equal or
 * opposite. On success, the result of the comparison is given via 'eq_or_opp'
 * out parameter (1 if equal or opposite, 0 otherwise). The function returns
 * 0 on succes, -1 on error.
 */
int prj_pt_eq_or_opp(prj_pt_src_t in1, prj_pt_src_t in2, int *eq_or_opp)
{
	int ret, cmp, _eq_or_opp;

	ret = prj_pt_check_initialized(in1); EG(ret, err);
	ret = prj_pt_check_initialized(in2); EG(ret, err);
	MUST_HAVE((in1->crv == in2->crv), ret, err);
	MUST_HAVE((eq_or_opp != NULL), ret, err);

	ret = _prj_pt_eq_or_opp_X(in1, in2, &cmp); EG(ret, err);
	ret = _prj_pt_eq_or_opp_Y(in1, in2, &_eq_or_opp);

	if (!ret) {
		(*eq_or_opp) = ((cmp == 0) & _eq_or_opp);
	}

err:
	return ret;
}

/* Compute the opposite of a projective point. Supports aliasing.
 * Returns 0 on success, -1 on failure.
 */
int prj_pt_neg(prj_pt_t out, prj_pt_src_t in)
{
	int ret;

	ret = prj_pt_check_initialized(in); EG(ret, err);

	if (out != in) { /* Copy point if not aliased */
		ret = prj_pt_init(out, in->crv); EG(ret, err);
		ret = prj_pt_copy(out, in); EG(ret, err);
	}

	/* Then, negate Y */
	ret = fp_neg(&(out->Y), &(out->Y));

err:
	return ret;
}

/*
 * Import a projective point from a buffer with the following layout; the 3
 * coordinates (elements of Fp) are each encoded on p_len bytes, where p_len
 * is the size of p in bytes (e.g. 66 for a prime p of 521 bits). Each
 * coordinate is encoded in big endian. Size of buffer must exactly match
 * 3 * p_len. The projective point is initialized by the function.
 *
 * The function returns 0 on success, -1 on error.
 */
int prj_pt_import_from_buf(prj_pt_t pt,
			   const u8 *pt_buf,
			   u16 pt_buf_len, ec_shortw_crv_src_t crv)
{
	int on_curve, ret;
	fp_ctx_src_t ctx;
	u16 coord_len;

	ret = ec_shortw_crv_check_initialized(crv); EG(ret, err);
	MUST_HAVE((pt_buf != NULL) && (pt != NULL), ret, err);

	ctx = crv->a.ctx;
	coord_len = (u16)BYTECEIL(ctx->p_bitlen);
	MUST_HAVE((pt_buf_len == (3 * coord_len)), ret, err);

	ret = fp_init_from_buf(&(pt->X), ctx, pt_buf, coord_len); EG(ret, err);
	ret = fp_init_from_buf(&(pt->Y), ctx, pt_buf + coord_len, coord_len); EG(ret, err);
	ret = fp_init_from_buf(&(pt->Z), ctx, pt_buf + (2 * coord_len), coord_len); EG(ret, err);

	/* Set the curve */
	pt->crv = crv;

	/* Mark the point as initialized */
	pt->magic = PRJ_PT_MAGIC;

	/* Check that the point is indeed on the provided curve, uninitialize it
	 * if this is not the case.
	 */
	ret = prj_pt_is_on_curve(pt, &on_curve); EG(ret, err);
	if (!on_curve){
		prj_pt_uninit(pt);
		ret = -1;
	}

err:
	PTR_NULLIFY(ctx);

	return ret;
}

/*
 * Import a projective point from an affine point buffer with the following layout; the 2
 * coordinates (elements of Fp) are each encoded on p_len bytes, where p_len
 * is the size of p in bytes (e.g. 66 for a prime p of 521 bits). Each
 * coordinate is encoded in big endian. Size of buffer must exactly match
 * 2 * p_len. The projective point is initialized by the function.
 *
 * The function returns 0 on success, -1 on error.
 */
int prj_pt_import_from_aff_buf(prj_pt_t pt,
			   const u8 *pt_buf,
			   u16 pt_buf_len, ec_shortw_crv_src_t crv)
{
	int ret, on_curve;
	fp_ctx_src_t ctx;
	u16 coord_len;

	ret = ec_shortw_crv_check_initialized(crv); EG(ret, err);
	MUST_HAVE((pt_buf != NULL) && (pt != NULL), ret, err);

	ctx = crv->a.ctx;
	coord_len = (u16)BYTECEIL(ctx->p_bitlen);
	MUST_HAVE((pt_buf_len == (2 * coord_len)), ret, err);

	ret = fp_init_from_buf(&(pt->X), ctx, pt_buf, coord_len); EG(ret, err);
	ret = fp_init_from_buf(&(pt->Y), ctx, pt_buf + coord_len, coord_len); EG(ret, err);
	/* Z coordinate is set to 1 */
	ret = fp_init(&(pt->Z), ctx); EG(ret, err);
	ret = fp_one(&(pt->Z)); EG(ret, err);

	/* Set the curve */
	pt->crv = crv;

	/* Mark the point as initialized */
	pt->magic = PRJ_PT_MAGIC;

	/* Check that the point is indeed on the provided curve, uninitialize it
	 * if this is not the case.
	 */
	ret = prj_pt_is_on_curve(pt, &on_curve); EG(ret, err);
	if (!on_curve){
		prj_pt_uninit(pt);
		ret = -1;
	}

err:
	PTR_NULLIFY(ctx);

	return ret;
}


/* Export a projective point to a buffer with the following layout; the 3
 * coordinates (elements of Fp) are each encoded on p_len bytes, where p_len
 * is the size of p in bytes (e.g. 66 for a prime p of 521 bits). Each
 * coordinate is encoded in big endian. Size of buffer must exactly match
 * 3 * p_len.
 *
 * The function returns 0 on success, -1 on error.
 */
int prj_pt_export_to_buf(prj_pt_src_t pt, u8 *pt_buf, u32 pt_buf_len)
{
	fp_ctx_src_t ctx;
	u16 coord_len;
	int ret, on_curve;

	ret = prj_pt_check_initialized(pt); EG(ret, err);

	MUST_HAVE((pt_buf != NULL), ret, err);

	/* The point to be exported must be on the curve */
	ret = prj_pt_is_on_curve(pt, &on_curve); EG(ret, err);
	MUST_HAVE((on_curve), ret, err);

	ctx = pt->crv->a.ctx;
	coord_len = (u16)BYTECEIL(ctx->p_bitlen);
	MUST_HAVE((pt_buf_len == (3 * coord_len)), ret, err);

	/* Export the three coordinates */
	ret = fp_export_to_buf(pt_buf, coord_len, &(pt->X)); EG(ret, err);
	ret = fp_export_to_buf(pt_buf + coord_len, coord_len, &(pt->Y)); EG(ret, err);
	ret = fp_export_to_buf(pt_buf + (2 * coord_len), coord_len, &(pt->Z));

err:
	PTR_NULLIFY(ctx);

	return ret;
}

/*
 * Export a projective point to an affine point buffer with the following
 * layout; the 2 coordinates (elements of Fp) are each encoded on p_len bytes,
 * where p_len is the size of p in bytes (e.g. 66 for a prime p of 521 bits).
 * Each coordinate is encoded in big endian. Size of buffer must exactly match
 * 2 * p_len.
 *
 * The function returns 0 on success, -1 on error.
 */
int prj_pt_export_to_aff_buf(prj_pt_src_t pt, u8 *pt_buf, u32 pt_buf_len)
{
	int ret, on_curve;
	aff_pt tmp_aff;
	tmp_aff.magic = WORD(0);

	ret = prj_pt_check_initialized(pt); EG(ret, err);

	MUST_HAVE((pt_buf != NULL), ret, err);

	/* The point to be exported must be on the curve */
	ret = prj_pt_is_on_curve(pt, &on_curve); EG(ret, err);
	MUST_HAVE((on_curve), ret, err);

	/* Move to the affine unique representation */
	ret = prj_pt_to_aff(&tmp_aff, pt); EG(ret, err);

	/* Export the affine point to the buffer */
	ret = aff_pt_export_to_buf(&tmp_aff, pt_buf, pt_buf_len);

err:
	aff_pt_uninit(&tmp_aff);

	return ret;
}


#ifdef NO_USE_COMPLETE_FORMULAS

/*
 * The function is an internal one: no check is performed on parameters,
 * this MUST be done by the caller:
 *
 *  - in is initialized
 *  - in and out must not be aliased
 *
 * The function will initialize 'out'. The function returns 0 on success, -1
 * on error.
 */
ATTRIBUTE_WARN_UNUSED_RET static int __prj_pt_dbl_monty_no_cf(prj_pt_t out, prj_pt_src_t in)
{
	fp XX, ZZ, w, s, ss, sss, R, RR, B, h;
	int ret;
	XX.magic = ZZ.magic = w.magic = s.magic = WORD(0);
	ss.magic = sss.magic = R.magic = WORD(0);
	RR.magic = B.magic = h.magic = WORD(0);

	ret = prj_pt_init(out, in->crv); EG(ret, err);

	ret = fp_init(&XX, out->crv->a.ctx); EG(ret, err);
	ret = fp_init(&ZZ, out->crv->a.ctx); EG(ret, err);
	ret = fp_init(&w, out->crv->a.ctx); EG(ret, err);
	ret = fp_init(&s, out->crv->a.ctx); EG(ret, err);
	ret = fp_init(&ss, out->crv->a.ctx); EG(ret, err);
	ret = fp_init(&sss, out->crv->a.ctx); EG(ret, err);
	ret = fp_init(&R, out->crv->a.ctx); EG(ret, err);
	ret = fp_init(&RR, out->crv->a.ctx); EG(ret, err);
	ret = fp_init(&B, out->crv->a.ctx); EG(ret, err);
	ret = fp_init(&h, out->crv->a.ctx); EG(ret, err);

	/* XX = X1² */
	ret = fp_sqr_monty(&XX, &(in->X)); EG(ret, err);

	/* ZZ = Z1² */
	ret = fp_sqr_monty(&ZZ, &(in->Z)); EG(ret, err);

	/* w = a*ZZ+3*XX */
	ret = fp_mul_monty(&w, &(in->crv->a_monty), &ZZ); EG(ret, err);
	ret = fp_add_monty(&w, &w, &XX); EG(ret, err);
	ret = fp_add_monty(&w, &w, &XX); EG(ret, err);
	ret = fp_add_monty(&w, &w, &XX); EG(ret, err);

	/* s = 2*Y1*Z1 */
	ret = fp_mul_monty(&s, &(in->Y), &(in->Z)); EG(ret, err);
	ret = fp_add_monty(&s, &s, &s); EG(ret, err);

	/* ss = s² */
	ret = fp_sqr_monty(&ss, &s); EG(ret, err);

	/* sss = s*ss */
	ret = fp_mul_monty(&sss, &s, &ss); EG(ret, err);

	/* R = Y1*s */
	ret = fp_mul_monty(&R, &(in->Y), &s); EG(ret, err);

	/* RR = R² */
	ret = fp_sqr_monty(&RR, &R); EG(ret, err);

	/* B = (X1+R)²-XX-RR */
	ret = fp_add_monty(&R, &R, &(in->X)); EG(ret, err);
	ret = fp_sqr_monty(&B, &R); EG(ret, err);
	ret = fp_sub_monty(&B, &B, &XX); EG(ret, err);
	ret = fp_sub_monty(&B, &B, &RR); EG(ret, err);

	/* h = w²-2*B */
	ret = fp_sqr_monty(&h, &w); EG(ret, err);
	ret = fp_sub_monty(&h, &h, &B); EG(ret, err);
	ret = fp_sub_monty(&h, &h, &B); EG(ret, err);

	/* X3 = h*s */
	ret = fp_mul_monty(&(out->X), &h, &s); EG(ret, err);

	/* Y3 = w*(B-h)-2*RR */
	ret = fp_sub_monty(&B, &B, &h); EG(ret, err);
	ret = fp_mul_monty(&(out->Y), &w, &B); EG(ret, err);
	ret = fp_sub_monty(&(out->Y), &(out->Y), &RR); EG(ret, err);
	ret = fp_sub_monty(&(out->Y), &(out->Y), &RR); EG(ret, err);

	/* Z3 = sss */
	ret = fp_copy(&(out->Z), &sss);

err:
	fp_uninit(&XX);
	fp_uninit(&ZZ);
	fp_uninit(&w);
	fp_uninit(&s);
	fp_uninit(&ss);
	fp_uninit(&sss);
	fp_uninit(&R);
	fp_uninit(&RR);
	fp_uninit(&B);
	fp_uninit(&h);

	return ret;
}

/*
 * The function is an internal one: no check is performed on parameters,
 * this MUST be done by the caller:
 *
 *  - in1 and in2 are initialized
 *  - in1 and in2 are on the same curve
 *  - in1/in2 and out must not be aliased
 *  - in1 and in2 must not be equal, opposite or have identical value
 *
 * The function will initialize 'out'. The function returns 0 on success, -1
 * on error.
 */
ATTRIBUTE_WARN_UNUSED_RET static int ___prj_pt_add_monty_no_cf(prj_pt_t out,
							       prj_pt_src_t in1,
							       prj_pt_src_t in2)
{
	fp Y1Z2, X1Z2, Z1Z2, u, uu, v, vv, vvv, R, A;
	int ret;
	Y1Z2.magic = X1Z2.magic = Z1Z2.magic = u.magic = uu.magic = v.magic = WORD(0);
	vv.magic = vvv.magic = R.magic = A.magic = WORD(0);

	ret = prj_pt_init(out, in1->crv); EG(ret, err);

	ret = fp_init(&Y1Z2, out->crv->a.ctx); EG(ret, err);
	ret = fp_init(&X1Z2, out->crv->a.ctx); EG(ret, err);
	ret = fp_init(&Z1Z2, out->crv->a.ctx); EG(ret, err);
	ret = fp_init(&u, out->crv->a.ctx); EG(ret, err);
	ret = fp_init(&uu, out->crv->a.ctx); EG(ret, err);
	ret = fp_init(&v, out->crv->a.ctx); EG(ret, err);
	ret = fp_init(&vv, out->crv->a.ctx); EG(ret, err);
	ret = fp_init(&vvv, out->crv->a.ctx); EG(ret, err);
	ret = fp_init(&R, out->crv->a.ctx); EG(ret, err);
	ret = fp_init(&A, out->crv->a.ctx); EG(ret, err);

	/* Y1Z2 = Y1*Z2 */
	ret = fp_mul_monty(&Y1Z2, &(in1->Y), &(in2->Z)); EG(ret, err);

	/* X1Z2 = X1*Z2 */
	ret = fp_mul_monty(&X1Z2, &(in1->X), &(in2->Z)); EG(ret, err);

	/* Z1Z2 = Z1*Z2 */
	ret = fp_mul_monty(&Z1Z2, &(in1->Z), &(in2->Z)); EG(ret, err);

	/* u = Y2*Z1-Y1Z2 */
	ret = fp_mul_monty(&u, &(in2->Y), &(in1->Z)); EG(ret, err);
	ret = fp_sub_monty(&u, &u, &Y1Z2); EG(ret, err);

	/* uu = u² */
	ret = fp_sqr_monty(&uu, &u); EG(ret, err);

	/* v = X2*Z1-X1Z2 */
	ret = fp_mul_monty(&v, &(in2->X), &(in1->Z)); EG(ret, err);
	ret = fp_sub_monty(&v, &v, &X1Z2); EG(ret, err);

	/* vv = v² */
	ret = fp_sqr_monty(&vv, &v); EG(ret, err);

	/* vvv = v*vv */
	ret = fp_mul_monty(&vvv, &v, &vv); EG(ret, err);

	/* R = vv*X1Z2 */
	ret = fp_mul_monty(&R, &vv, &X1Z2); EG(ret, err);

	/* A = uu*Z1Z2-vvv-2*R */
	ret = fp_mul_monty(&A, &uu, &Z1Z2); EG(ret, err);
	ret = fp_sub_monty(&A, &A, &vvv); EG(ret, err);
	ret = fp_sub_monty(&A, &A, &R); EG(ret, err);
	ret = fp_sub_monty(&A, &A, &R); EG(ret, err);

	/* X3 = v*A */
	ret = fp_mul_monty(&(out->X), &v, &A); EG(ret, err);

	/* Y3 = u*(R-A)-vvv*Y1Z2 */
	ret = fp_sub_monty(&R, &R, &A); EG(ret, err);
	ret = fp_mul_monty(&(out->Y), &u, &R); EG(ret, err);
	ret = fp_mul_monty(&R, &vvv, &Y1Z2); EG(ret, err);
	ret = fp_sub_monty(&(out->Y), &(out->Y), &R); EG(ret, err);

	/* Z3 = vvv*Z1Z2 */
	ret = fp_mul_monty(&(out->Z), &vvv, &Z1Z2);

err:
	fp_uninit(&Y1Z2);
	fp_uninit(&X1Z2);
	fp_uninit(&Z1Z2);
	fp_uninit(&u);
	fp_uninit(&uu);
	fp_uninit(&v);
	fp_uninit(&vv);
	fp_uninit(&vvv);
	fp_uninit(&R);
	fp_uninit(&A);

	return ret;
}

/*
 * Public version of the addition w/o complete formulas to handle the case
 * where the inputs are zero or opposite. Returns 0 on success, -1 on error.
 */
ATTRIBUTE_WARN_UNUSED_RET static int __prj_pt_add_monty_no_cf(prj_pt_t out, prj_pt_src_t in1, prj_pt_src_t in2)
{
	int ret, iszero, eq_or_opp, cmp;

	ret = prj_pt_check_initialized(in1); EG(ret, err);
	ret = prj_pt_check_initialized(in2); EG(ret, err);
	MUST_HAVE((in1->crv == in2->crv), ret, err);

	ret = prj_pt_iszero(in1, &iszero); EG(ret, err);
	if (iszero) {
		/* in1 at infinity, output in2 in all cases */
		ret = prj_pt_init(out, in2->crv); EG(ret, err);
		ret = prj_pt_copy(out, in2); EG(ret, err);
	} else {
		/* in1 not at infinity, output in2 */
		ret = prj_pt_iszero(in2, &iszero); EG(ret, err);
		if (iszero) {
			/* in2 at infinity, output in1 */
			ret = prj_pt_init(out, in1->crv); EG(ret, err);
			ret = prj_pt_copy(out, in1); EG(ret, err);
		} else {
			/* enither in1, nor in2 at infinity */

			/*
			 * The following test which guarantees in1 and in2 are not
			 * equal or opposite needs to be rewritten because it
			 * has a *HUGE* impact on perf (ec_self_tests run on
			 * all test vectors takes 24 times as long with this
			 * enabled). The same exists in non monty version.
			 */
			ret = prj_pt_eq_or_opp(in1, in2, &eq_or_opp); EG(ret, err);
			if (eq_or_opp) {
				/* in1 and in2 are either equal or opposite */
				ret = prj_pt_cmp(in1, in2, &cmp); EG(ret, err);
				if (cmp == 0) {
					/* in1 == in2 => doubling w/o cf */
					ret = __prj_pt_dbl_monty_no_cf(out, in1); EG(ret, err);
				} else {
					/* in1 == -in2 => output zero (point at infinity) */
					ret = prj_pt_init(out, in1->crv); EG(ret, err);
					ret = prj_pt_zero(out); EG(ret, err);
				}
			} else {
				/*
				 * in1 and in2 are neither 0, nor equal or
				 * opposite. Use the basic monty addition
				 * implementation w/o complete formulas.
				 */
				ret = ___prj_pt_add_monty_no_cf(out, in1, in2); EG(ret, err);
			}
		}
	}

err:
	return ret;
}


#else /* NO_USE_COMPLETE_FORMULAS */


/*
 * If NO_USE_COMPLETE_FORMULAS flag is not defined addition formulas from Algorithm 3
 * of https://joostrenes.nl/publications/complete.pdf are used, otherwise
 * http://www.hyperelliptic.org/EFD/g1p/auto-shortw-projective.html#doubling-dbl-2007-bl
 */
ATTRIBUTE_WARN_UNUSED_RET static int __prj_pt_dbl_monty_cf(prj_pt_t out, prj_pt_src_t in)
{
	fp t0, t1, t2, t3;
	int ret;
	t0.magic = t1.magic = t2.magic = t3.magic = WORD(0);

	ret = prj_pt_init(out, in->crv); EG(ret, err);

	ret = fp_init(&t0, out->crv->a.ctx); EG(ret, err);
	ret = fp_init(&t1, out->crv->a.ctx); EG(ret, err);
	ret = fp_init(&t2, out->crv->a.ctx); EG(ret, err);
	ret = fp_init(&t3, out->crv->a.ctx); EG(ret, err);

	ret = fp_mul_monty(&t0, &in->X, &in->X); EG(ret, err);
	ret = fp_mul_monty(&t1, &in->Y, &in->Y); EG(ret, err);
	ret = fp_mul_monty(&t2, &in->Z, &in->Z); EG(ret, err);
	ret = fp_mul_monty(&t3, &in->X, &in->Y); EG(ret, err);
	ret = fp_add_monty(&t3, &t3, &t3); EG(ret, err);

	ret = fp_mul_monty(&out->Z, &in->X, &in->Z); EG(ret, err);
	ret = fp_add_monty(&out->Z, &out->Z, &out->Z); EG(ret, err);
	ret = fp_mul_monty(&out->X, &in->crv->a_monty, &out->Z); EG(ret, err);
	ret = fp_mul_monty(&out->Y, &in->crv->b3_monty, &t2); EG(ret, err);
	ret = fp_add_monty(&out->Y, &out->X, &out->Y); EG(ret, err);

	ret = fp_sub_monty(&out->X, &t1, &out->Y); EG(ret, err);
	ret = fp_add_monty(&out->Y, &t1, &out->Y); EG(ret, err);
	ret = fp_mul_monty(&out->Y, &out->X, &out->Y); EG(ret, err);
	ret = fp_mul_monty(&out->X, &t3, &out->X); EG(ret, err);
	ret = fp_mul_monty(&out->Z, &in->crv->b3_monty, &out->Z); EG(ret, err);

	ret = fp_mul_monty(&t2, &in->crv->a_monty, &t2); EG(ret, err);
	ret = fp_sub_monty(&t3, &t0, &t2); EG(ret, err);
	ret = fp_mul_monty(&t3, &in->crv->a_monty, &t3); EG(ret, err);
	ret = fp_add_monty(&t3, &t3, &out->Z); EG(ret, err);
	ret = fp_add_monty(&out->Z, &t0, &t0); EG(ret, err);

	ret = fp_add_monty(&t0, &out->Z, &t0); EG(ret, err);
	ret = fp_add_monty(&t0, &t0, &t2); EG(ret, err);
	ret = fp_mul_monty(&t0, &t0, &t3); EG(ret, err);
	ret = fp_add_monty(&out->Y, &out->Y, &t0); EG(ret, err);
	ret = fp_mul_monty(&t2, &in->Y, &in->Z); EG(ret, err);

	ret = fp_add_monty(&t2, &t2, &t2); EG(ret, err);
	ret = fp_mul_monty(&t0, &t2, &t3); EG(ret, err);
	ret = fp_sub_monty(&out->X, &out->X, &t0); EG(ret, err);
	ret = fp_mul_monty(&out->Z, &t2, &t1); EG(ret, err);
	ret = fp_add_monty(&out->Z, &out->Z, &out->Z); EG(ret, err);

	ret = fp_add_monty(&out->Z, &out->Z, &out->Z);

err:
	fp_uninit(&t0);
	fp_uninit(&t1);
	fp_uninit(&t2);
	fp_uninit(&t3);

	return ret;
}

/*
 * If NO_USE_COMPLETE_FORMULAS flag is not defined addition formulas from Algorithm 1
 * of https://joostrenes.nl/publications/complete.pdf are used, otherwise
 * http://www.hyperelliptic.org/EFD/g1p/auto-shortw-projective.html#addition-add-1998-cmo-2
 */

/*
 * The function is an internal one: no check is performed on parameters,
 * this MUST be done by the caller:
 *
 *  - in1 and in2 are initialized
 *  - in1 and in2 are on the same curve
 *  - in1/in2 and out must not be aliased
 *  - in1 and in2 must not be an "exceptional" pair, i.e. (in1-in2) is not a point
 *  of order exactly 2
 *
 * The function will initialize 'out'. The function returns 0 on success, -1
 * on error.
 */
ATTRIBUTE_WARN_UNUSED_RET static int __prj_pt_add_monty_cf(prj_pt_t out,
							   prj_pt_src_t in1,
							   prj_pt_src_t in2)
{
	int cmp1, cmp2;
	fp t0, t1, t2, t3, t4, t5;
	int ret;
	t0.magic = t1.magic = t2.magic = WORD(0);
	t3.magic = t4.magic = t5.magic = WORD(0);

	ret = prj_pt_init(out, in1->crv); EG(ret, err);

	ret = fp_init(&t0, out->crv->a.ctx); EG(ret, err);
	ret = fp_init(&t1, out->crv->a.ctx); EG(ret, err);
	ret = fp_init(&t2, out->crv->a.ctx); EG(ret, err);
	ret = fp_init(&t3, out->crv->a.ctx); EG(ret, err);
	ret = fp_init(&t4, out->crv->a.ctx); EG(ret, err);
	ret = fp_init(&t5, out->crv->a.ctx); EG(ret, err);

	ret = fp_mul_monty(&t0, &in1->X, &in2->X); EG(ret, err);
	ret = fp_mul_monty(&t1, &in1->Y, &in2->Y); EG(ret, err);
	ret = fp_mul_monty(&t2, &in1->Z, &in2->Z); EG(ret, err);
	ret = fp_add_monty(&t3, &in1->X, &in1->Y); EG(ret, err);
	ret = fp_add_monty(&t4, &in2->X, &in2->Y); EG(ret, err);

	ret = fp_mul_monty(&t3, &t3, &t4); EG(ret, err);
	ret = fp_add_monty(&t4, &t0, &t1); EG(ret, err);
	ret = fp_sub_monty(&t3, &t3, &t4); EG(ret, err);
	ret = fp_add_monty(&t4, &in1->X, &in1->Z); EG(ret, err);
	ret = fp_add_monty(&t5, &in2->X, &in2->Z); EG(ret, err);

	ret = fp_mul_monty(&t4, &t4, &t5); EG(ret, err);
	ret = fp_add_monty(&t5, &t0, &t2); EG(ret, err);
	ret = fp_sub_monty(&t4, &t4, &t5); EG(ret, err);
	ret = fp_add_monty(&t5, &in1->Y, &in1->Z); EG(ret, err);
	ret = fp_add_monty(&out->X, &in2->Y, &in2->Z); EG(ret, err);

	ret = fp_mul_monty(&t5, &t5, &out->X); EG(ret, err);
	ret = fp_add_monty(&out->X, &t1, &t2); EG(ret, err);
	ret = fp_sub_monty(&t5, &t5, &out->X); EG(ret, err);
	ret = fp_mul_monty(&out->Z, &in1->crv->a_monty, &t4); EG(ret, err);
	ret = fp_mul_monty(&out->X, &in1->crv->b3_monty, &t2); EG(ret, err);

	ret = fp_add_monty(&out->Z, &out->X, &out->Z); EG(ret, err);
	ret = fp_sub_monty(&out->X, &t1, &out->Z); EG(ret, err);
	ret = fp_add_monty(&out->Z, &t1, &out->Z); EG(ret, err);
	ret = fp_mul_monty(&out->Y, &out->X, &out->Z); EG(ret, err);
	ret = fp_add_monty(&t1, &t0, &t0); EG(ret, err);

	ret = fp_add_monty(&t1, &t1, &t0); EG(ret, err);
	ret = fp_mul_monty(&t2, &in1->crv->a_monty, &t2); EG(ret, err);
	ret = fp_mul_monty(&t4, &in1->crv->b3_monty, &t4); EG(ret, err);
	ret = fp_add_monty(&t1, &t1, &t2); EG(ret, err);
	ret = fp_sub_monty(&t2, &t0, &t2); EG(ret, err);

	ret = fp_mul_monty(&t2, &in1->crv->a_monty, &t2); EG(ret, err);
	ret = fp_add_monty(&t4, &t4, &t2); EG(ret, err);
	ret = fp_mul_monty(&t0, &t1, &t4); EG(ret, err);
	ret = fp_add_monty(&out->Y, &out->Y, &t0); EG(ret, err);
	ret = fp_mul_monty(&t0, &t5, &t4); EG(ret, err);

	ret = fp_mul_monty(&out->X, &t3, &out->X); EG(ret, err);
	ret = fp_sub_monty(&out->X, &out->X, &t0); EG(ret, err);
	ret = fp_mul_monty(&t0, &t3, &t1); EG(ret, err);
	ret = fp_mul_monty(&out->Z, &t5, &out->Z); EG(ret, err);
	ret = fp_add_monty(&out->Z, &out->Z, &t0);

	/* Check for "exceptional" pairs of input points with
	 * checking if Y = Z = 0 as output (see the Bosma-Lenstra
	 * article "Complete Systems of Two Addition Laws for
	 * Elliptic Curves"). This should only happen on composite
	 * order curves (i.e. not on prime order curves).
	 *
	 * In this case, we raise an error as the result is
	 * not sound. This should not happen in our nominal
	 * cases with regular signature and protocols, and if
	 * it happens this usually means that bad points have
	 * been injected.
	 *
	 * NOTE: if for some reasons you need to deal with
	 * all the possible pairs of points including these
	 * exceptional pairs of inputs with an order 2 difference,
	 * you should fallback to the incomplete formulas using the
	 * COMPLETE=0 compilation toggle. Beware that in this
	 * case, the library will be more sensitive to
	 * side-channel attacks.
	 */
	ret = fp_iszero(&(out->Z), &cmp1); EG(ret, err);
	ret = fp_iszero(&(out->Y), &cmp2); EG(ret, err);
	MUST_HAVE(!((cmp1) && (cmp2)), ret, err);

err:
	fp_uninit(&t0);
	fp_uninit(&t1);
	fp_uninit(&t2);
	fp_uninit(&t3);
	fp_uninit(&t4);
	fp_uninit(&t5);

	return ret;
}
#endif  /* NO_USE_COMPLETE_FORMULAS */

/*
 * Internal function:
 *
 *  - not supporting aliasing,
 *  - requiring caller to check in parameter is initialized
 *
 * Based on library configuration, the function either use complete formulas
 * or not.
 */
static int _prj_pt_dbl_monty(prj_pt_t out, prj_pt_src_t in)
{
	int ret;

#ifdef NO_USE_COMPLETE_FORMULAS
	int iszero;
	ret = prj_pt_iszero(in, &iszero); EG(ret, err);
	if (iszero) {
		ret = prj_pt_init(out, in->crv); EG(ret, err);
		ret = prj_pt_zero(out);
	} else {
		ret = __prj_pt_dbl_monty_no_cf(out, in);
	}
#else
	ret = __prj_pt_dbl_monty_cf(out, in); EG(ret, err);
#endif

err:
	return ret;
}

/*
 * Internal version that peform in place doubling of given val,
 * by using a temporary copy. Sanity checks on parameters must
 * be done by caller.
 */
ATTRIBUTE_WARN_UNUSED_RET static int _prj_pt_dbl_monty_aliased(prj_pt_t val)
{
	prj_pt out_cpy;
	int ret;
	out_cpy.magic = WORD(0);

	ret = _prj_pt_dbl_monty(&out_cpy, val); EG(ret, err);
	ret = prj_pt_copy(val, &out_cpy);

err:
	prj_pt_uninit(&out_cpy);

	return ret;
}

/*
 * Public function for projective point doubling. The function handles the init
 * check of 'in' parameter which must be guaranteed for internal functions.
 * 'out' parameter need not be initialized and can be aliased with 'in'
 * parameter.
 *
 * The function returns 0 on success, -1 on error.
 */
ATTRIBUTE_WARN_UNUSED_RET int prj_pt_dbl(prj_pt_t out, prj_pt_src_t in)
{
	int ret;

	ret = prj_pt_check_initialized(in); EG(ret, err);

	if (out == in) {
		ret = _prj_pt_dbl_monty_aliased(out);
	} else {
		ret = _prj_pt_dbl_monty(out, in);
	}

err:
	return ret;
}

/*
 * Internal function:
 *
 *  - not supporting aliasing,
 *  - requiring caller to check in1 and in2 parameter
 *
 * Based on library configuration, the function either use complete formulas
 * or not.
 */
ATTRIBUTE_WARN_UNUSED_RET static inline int _prj_pt_add_monty(prj_pt_t out,
							      prj_pt_src_t in1,
							      prj_pt_src_t in2)
{
#ifndef NO_USE_COMPLETE_FORMULAS
	return __prj_pt_add_monty_cf(out, in1, in2);
#else
	return __prj_pt_add_monty_no_cf(out, in1, in2);
#endif
}

/*
 * The function is an internal one that specifically handles aliasing. No check
 * is performed on parameters, this MUST be done by the caller:
 *
 *  - in1 and in2 are initialized
 *  - in1 and in2 are on the same curve
 *
 * The function will initialize 'out'. The function returns 0 on success, -1
 * on error.
 */
ATTRIBUTE_WARN_UNUSED_RET static int _prj_pt_add_monty_aliased(prj_pt_t out,
								prj_pt_src_t in1,
								prj_pt_src_t in2)
{
	int ret;
	prj_pt out_cpy;
	out_cpy.magic = WORD(0);

	ret = _prj_pt_add_monty(&out_cpy, in1, in2); EG(ret, err);
	ret = prj_pt_copy(out, &out_cpy); EG(ret, err);

err:
	prj_pt_uninit(&out_cpy);

	return ret;
}

/*
 * Public function for projective point addition. The function handles the
 * init checks of 'in1' and 'in2' parameters, along with the check they
 * use the same curve. This must be guaranteed for internal functions.
 * 'out' parameter need not be initialized and can be aliased with either
 * 'in1' or 'in2' parameter.
 *
 * The function returns 0 on success, -1 on error.
 */
int prj_pt_add(prj_pt_t out, prj_pt_src_t in1, prj_pt_src_t in2)
{
	int ret;

	ret = prj_pt_check_initialized(in1); EG(ret, err);
	ret = prj_pt_check_initialized(in2); EG(ret, err);
	MUST_HAVE((in1->crv == in2->crv), ret, err);

	if ((out == in1) || (out == in2)) {
		ret = _prj_pt_add_monty_aliased(out, in1, in2);
	} else {
		ret = _prj_pt_add_monty(out, in1, in2);
	}

err:
	return ret;
}

/*******************************************************************************/
/****** Scalar multiplication algorithms ***************************************/
/***********/
/*
 * The description below summarizes the following algorithms.
 *
 * Double-and-Add-Always and Montgomery Ladder masked using Itoh et al. anti-ADPA
 * (Address-bit DPA) countermeasure.
 * See "A Practical Countermeasure against Address-Bit Differential Power Analysis"
 * by Itoh, Izu and Takenaka for more information.
 *
 * NOTE: these masked variants of the Double-and-Add-Always and Montgomery Ladder algorithms
 * are used by default as Itoh et al. countermeasure has a very small impact on performance
 * and is inherently more robust againt DPA. The only case where we use another variant is
 * for devices with low memory as Itoh requires many temporary variables that consume many
 * temporary stack space.
 *
 * NOTE: the algorithms inherently depend on the MSB of the
 * scalar. In order to avoid leaking this MSB and fall into HNP (Hidden Number
 * Problem) issues, we use the trick described in https://eprint.iacr.org/2011/232.pdf
 * to have the MSB always set. However, since the scalar m might be less or bigger than
 * the order q of the curve, we distinguish three situations:
 *     - The scalar m is < q (the order), in this case we compute:
 *         -
 *        | m' = m + (2 * q) if [log(k + q)] == [log(q)],
 *        | m' = m + q otherwise.
 *         -
 *     - The scalar m is >= q and < q**2, in this case we compute:
 *         -
 *        | m' = m + (2 * (q**2)) if [log(k + (q**2))] == [log(q**2)],
 *        | m' = m + (q**2) otherwise.
 *         -
 *     - The scalar m is >= (q**2), in this case m == m'
 *
 *   => We only deal with 0 <= m < (q**2) using the countermeasure. When m >= (q**2),
 *      we stick with m' = m, accepting MSB issues (not much can be done in this case
 *      anyways). In the two first cases, Double-and-Add-Always and Montgomery Ladder are
 *      performed in constant time wrt the size of the scalar m.
 */
/***********/
/*
 * Internal point blinding function: as it is internal, in is supposed to be initialized and
 * aliasing is NOT supported.
 */
ATTRIBUTE_WARN_UNUSED_RET static int _blind_projective_point(prj_pt_t out, prj_pt_src_t in)
{
	int ret;

	/* Random for projective coordinates masking */
	/* NOTE: to limit stack usage, we reuse out->Z as a temporary
	 * variable. This does not work if in == out, hence the check.
	 */
	MUST_HAVE((in != out), ret, err);

	ret = prj_pt_init(out, in->crv); EG(ret, err);

	/* Get a random value l in Fp */
	ret = fp_get_random(&(out->Z), in->X.ctx); EG(ret, err);

	/*
	 * Blind the point with projective coordinates
	 * (X, Y, Z) => (l*X, l*Y, l*Z)
	 */
	ret = fp_mul_monty(&(out->X), &(in->X), &(out->Z)); EG(ret, err);
	ret = fp_mul_monty(&(out->Y), &(in->Y), &(out->Z)); EG(ret, err);
	ret = fp_mul_monty(&(out->Z), &(in->Z), &(out->Z));

err:
	return ret;
}

/* If nothing is specified regarding the scalar multiplication algorithm, we use
 * the Montgomery Ladder. For the specific case of small stack devices, we release
 * some pressure on the stack by explicitly using double and always WITHOUT the Itoh
 * et al. countermeasure against A-DPA as it is quite consuming.
 */
#if defined(USE_SMALL_STACK) && defined(USE_MONTY_LADDER)
#error "Small stack is only compatible with USE_DOUBLE_ADD_ALWAYS while USE_MONTY_LADDER has been explicitly asked!"
#endif

#if defined(USE_SMALL_STACK)
#ifndef USE_DOUBLE_ADD_ALWAYS
#define USE_DOUBLE_ADD_ALWAYS
#endif
#endif

#if !defined(USE_DOUBLE_ADD_ALWAYS) && !defined(USE_MONTY_LADDER)
#define USE_MONTY_LADDER
#endif

#if defined(USE_DOUBLE_ADD_ALWAYS) && defined(USE_MONTY_LADDER)
#error "You can either choose USE_DOUBLE_ADD_ALWAYS or USE_MONTY_LADDER, not both!"
#endif

#if defined(USE_DOUBLE_ADD_ALWAYS) && !defined(USE_SMALL_STACK)
ATTRIBUTE_WARN_UNUSED_RET static int _prj_pt_mul_ltr_monty_dbl_add_always(prj_pt_t out, nn_src_t m, prj_pt_src_t in)
{
	/* We use Itoh et al. notations here for T and the random r */
	prj_pt T[3];
	bitcnt_t mlen;
	u8 mbit, rbit;
	/* Random for masking the Double and Add Always algorithm */
	nn r;
	/* The new scalar we will use with MSB fixed to 1 (noted m' above).
	 * This helps dealing with constant time.
	 */
	nn m_msb_fixed;
	nn_src_t curve_order;
	nn curve_order_square;
	int ret, ret_ops, cmp;
	r.magic = m_msb_fixed.magic = curve_order_square.magic = WORD(0);
	T[0].magic = T[1].magic = T[2].magic = WORD(0);

	/* Compute m' from m depending on the rule described above */
	curve_order = &(in->crv->order);
	/* First compute q**2 */
	ret = nn_sqr(&curve_order_square, curve_order); EG(ret, err);
	/* Then compute m' depending on m size */
	ret = nn_cmp(m, curve_order, &cmp); EG(ret, err);
	if (cmp < 0){
		bitcnt_t msb_bit_len, order_bitlen;

		/* Case where m < q */
		ret = nn_add(&m_msb_fixed, m, curve_order); EG(ret, err);
		ret = nn_bitlen(&m_msb_fixed, &msb_bit_len); EG(ret, err);
		ret = nn_bitlen(curve_order, &order_bitlen); EG(ret, err);
		ret = nn_cnd_add((msb_bit_len == order_bitlen), &m_msb_fixed,
				  &m_msb_fixed, curve_order); EG(ret, err);
	} else {
		ret = nn_cmp(m, &curve_order_square, &cmp); EG(ret, err);
		if (cmp < 0) {
			bitcnt_t msb_bit_len, curve_order_square_bitlen;

			/* Case where m >= q and m < (q**2) */
			ret = nn_add(&m_msb_fixed, m, &curve_order_square); EG(ret, err);
			ret = nn_bitlen(&m_msb_fixed, &msb_bit_len); EG(ret, err);
			ret = nn_bitlen(&curve_order_square, &curve_order_square_bitlen); EG(ret, err);
			ret = nn_cnd_add((msb_bit_len == curve_order_square_bitlen),
					&m_msb_fixed, &m_msb_fixed, &curve_order_square); EG(ret, err);
		} else {
			/* Case where m >= (q**2) */
			ret = nn_copy(&m_msb_fixed, m); EG(ret, err);
		}
	}
	ret = nn_bitlen(&m_msb_fixed, &mlen); EG(ret, err);
	MUST_HAVE(mlen != 0, ret, err);
	mlen--;

	/* Hide possible internal failures for double and add
	 * operations and perform the operation in constant time.
	 */
	ret_ops = 0;

	/* Get a random r with the same size of m_msb_fixed */
	ret = nn_get_random_len(&r, m_msb_fixed.wlen * WORD_BYTES); EG(ret, err);

	ret = nn_getbit(&r, mlen, &rbit); EG(ret, err);

	/* Initialize points */
	ret = prj_pt_init(&T[0], in->crv); EG(ret, err);
	ret = prj_pt_init(&T[1], in->crv); EG(ret, err);

	/*
	 * T[2] = R(P)
	 * Blind the point with projective coordinates
	 * (X, Y, Z) => (l*X, l*Y, l*Z)
	 */
	ret = _blind_projective_point(&T[2], in); EG(ret, err);

	/*  T[r[n-1]] = T[2] */
	ret = prj_pt_copy(&T[rbit], &T[2]); EG(ret, err);

	/* Main loop of Double and Add Always */
	while (mlen > 0) {
		u8 rbit_next;
		--mlen;
		/* rbit is r[i+1], and rbit_next is r[i] */
		ret = nn_getbit(&r, mlen, &rbit_next); EG(ret, err);

		/* mbit is m[i] */
		ret = nn_getbit(&m_msb_fixed, mlen, &mbit); EG(ret, err);

		/* Double: T[r[i+1]] = ECDBL(T[r[i+1]]) */
#ifndef NO_USE_COMPLETE_FORMULAS
		/*
		 * NOTE: in case of complete formulas, we use the
		 * addition for doubling, incurring a small performance hit
		 * for better SCA resistance.
		 */
		ret_ops |= prj_pt_add(&T[rbit], &T[rbit], &T[rbit]);
#else
		ret_ops |= prj_pt_dbl(&T[rbit], &T[rbit]);
#endif
		/* Add:  T[1-r[i+1]] = ECADD(T[r[i+1]],T[2]) */
		ret_ops |= prj_pt_add(&T[1-rbit], &T[rbit], &T[2]);

		/*
		 * T[r[i]] = T[d[i] ^ r[i+1]]
		 * NOTE: we use the low level nn_copy function here to avoid
		 * any possible leakage on operands with prj_pt_copy
		 */
		ret = nn_copy(&(T[rbit_next].X.fp_val), &(T[mbit ^ rbit].X.fp_val)); EG(ret, err);
		ret = nn_copy(&(T[rbit_next].Y.fp_val), &(T[mbit ^ rbit].Y.fp_val)); EG(ret, err);
		ret = nn_copy(&(T[rbit_next].Z.fp_val), &(T[mbit ^ rbit].Z.fp_val)); EG(ret, err);

		/* Update rbit */
		rbit = rbit_next;
	}
	/* Output: T[r[0]] */
	ret = prj_pt_copy(out, &T[rbit]); EG(ret, err);

	/* Take into consideration our double and add errors */
	ret |= ret_ops;

err:
	prj_pt_uninit(&T[0]);
	prj_pt_uninit(&T[1]);
	prj_pt_uninit(&T[2]);
	nn_uninit(&r);
	nn_uninit(&m_msb_fixed);
	nn_uninit(&curve_order_square);

	PTR_NULLIFY(curve_order);

	return ret;
}
#endif

#if defined(USE_DOUBLE_ADD_ALWAYS) && defined(USE_SMALL_STACK)
/* NOTE: in small stack case where we compile for low memory devices, we do not use Itoh et al. countermeasure
 * as it requires too much temporary space on the stack.
 */
ATTRIBUTE_WARN_UNUSED_RET static int _prj_pt_mul_ltr_monty_dbl_add_always(prj_pt_t out, nn_src_t m, prj_pt_src_t in)
{
	int ret, ret_ops;

	/* Hide possible internal failures for double and add
	 * operations and perform the operation in constant time.
	 */
	ret_ops = 0;

	/* Blind the input point projective coordinates */
	ret = _blind_projective_point(out, in); EG(ret, err);

	/*******************/
	{
		bitcnt_t mlen;
		u8 mbit;
		/* The new scalar we will use with MSB fixed to 1 (noted m' above).
		 * This helps dealing with constant time.
		 */
		nn m_msb_fixed;
		nn_src_t curve_order;
		int cmp;
		m_msb_fixed.magic = WORD(0);

		{
			nn curve_order_square;
			curve_order_square.magic = WORD(0);

			/* Compute m' from m depending on the rule described above */
			curve_order = &(in->crv->order);
			/* First compute q**2 */
			ret = nn_sqr(&curve_order_square, curve_order); EG(ret, err1);
			/* Then compute m' depending on m size */
			ret = nn_cmp(m, curve_order, &cmp); EG(ret, err1);
			if (cmp < 0){
				bitcnt_t msb_bit_len, order_bitlen;

				/* Case where m < q */
				ret = nn_add(&m_msb_fixed, m, curve_order); EG(ret, err1);
				ret = nn_bitlen(&m_msb_fixed, &msb_bit_len); EG(ret, err1);
				ret = nn_bitlen(curve_order, &order_bitlen); EG(ret, err1);
				ret = nn_cnd_add((msb_bit_len == order_bitlen), &m_msb_fixed,
					  &m_msb_fixed, curve_order); EG(ret, err1);
			} else {
				ret = nn_cmp(m, &curve_order_square, &cmp); EG(ret, err1);
				if (cmp < 0) {
					bitcnt_t msb_bit_len, curve_order_square_bitlen;

					/* Case where m >= q and m < (q**2) */
					ret = nn_add(&m_msb_fixed, m, &curve_order_square); EG(ret, err1);
					ret = nn_bitlen(&m_msb_fixed, &msb_bit_len); EG(ret, err1);
					ret = nn_bitlen(&curve_order_square, &curve_order_square_bitlen); EG(ret, err1);
					ret = nn_cnd_add((msb_bit_len == curve_order_square_bitlen),
							&m_msb_fixed, &m_msb_fixed, &curve_order_square); EG(ret, err1);
				} else {
					/* Case where m >= (q**2) */
					ret = nn_copy(&m_msb_fixed, m); EG(ret, err1);
				}
			}
err1:
			nn_uninit(&curve_order_square); EG(ret, err);
		}

		ret = nn_bitlen(&m_msb_fixed, &mlen); EG(ret, err);
		MUST_HAVE((mlen != 0), ret, err);
		mlen--;

		{
			prj_pt dbl;
			dbl.magic = WORD(0);

			/* Initialize temporary point */
			ret = prj_pt_init(&dbl, in->crv); EG(ret, err2);

			/* Main loop of Double and Add Always */
			while (mlen > 0) {
				--mlen;
				/* mbit is m[i] */
				ret = nn_getbit(&m_msb_fixed, mlen, &mbit); EG(ret, err2);

#ifndef NO_USE_COMPLETE_FORMULAS
				/*
				 * NOTE: in case of complete formulas, we use the
				 * addition for doubling, incurring a small performance hit
				 * for better SCA resistance.
				 */
				ret_ops |= prj_pt_add(&dbl, out, out);
#else
				ret_ops |= prj_pt_dbl(&dbl, out);
#endif
				ret_ops |= prj_pt_add(out, &dbl, in);
				/* Swap */
				ret = nn_cnd_swap(!mbit, &(out->X.fp_val), &(dbl.X.fp_val)); EG(ret, err2);
				ret = nn_cnd_swap(!mbit, &(out->Y.fp_val), &(dbl.Y.fp_val)); EG(ret, err2);
				ret = nn_cnd_swap(!mbit, &(out->Z.fp_val), &(dbl.Z.fp_val)); EG(ret, err2);
			}
err2:
			prj_pt_uninit(&dbl); EG(ret, err);
		}

err:
		nn_uninit(&m_msb_fixed);

		PTR_NULLIFY(curve_order);
	}

	/* Take into consideration our double and add errors */
	ret |= ret_ops;

	return ret;
}
#endif


#ifdef USE_MONTY_LADDER
ATTRIBUTE_WARN_UNUSED_RET static int _prj_pt_mul_ltr_monty_ladder(prj_pt_t out, nn_src_t m, prj_pt_src_t in)
{
	/* We use Itoh et al. notations here for T and the random r */
	prj_pt T[3];
	bitcnt_t mlen;
	u8 mbit, rbit;
	/* Random for masking the Montgomery Ladder algorithm */
	nn r;
	/* The new scalar we will use with MSB fixed to 1 (noted m' above).
	 * This helps dealing with constant time.
	 */
	nn m_msb_fixed;
	nn_src_t curve_order;
	nn curve_order_square;
	int ret, ret_ops, cmp;
	r.magic = m_msb_fixed.magic = curve_order_square.magic = WORD(0);
	T[0].magic = T[1].magic = T[2].magic = WORD(0);

	/* Compute m' from m depending on the rule described above */
	curve_order = &(in->crv->order);

	/* First compute q**2 */
	ret = nn_sqr(&curve_order_square, curve_order); EG(ret, err);

	/* Then compute m' depending on m size */
	ret = nn_cmp(m, curve_order, &cmp); EG(ret, err);
	if (cmp < 0) {
		bitcnt_t msb_bit_len, order_bitlen;

		/* Case where m < q */
		ret = nn_add(&m_msb_fixed, m, curve_order); EG(ret, err);
		ret = nn_bitlen(&m_msb_fixed, &msb_bit_len); EG(ret, err);
		ret = nn_bitlen(curve_order, &order_bitlen); EG(ret, err);
		ret = nn_cnd_add((msb_bit_len == order_bitlen), &m_msb_fixed,
				&m_msb_fixed, curve_order); EG(ret, err);
	} else {
		ret = nn_cmp(m, &curve_order_square, &cmp); EG(ret, err);
		if (cmp < 0) {
			bitcnt_t msb_bit_len, curve_order_square_bitlen;

			/* Case where m >= q and m < (q**2) */
			ret = nn_add(&m_msb_fixed, m, &curve_order_square); EG(ret, err);
			ret = nn_bitlen(&m_msb_fixed, &msb_bit_len); EG(ret, err);
			ret = nn_bitlen(&curve_order_square, &curve_order_square_bitlen); EG(ret, err);
			ret = nn_cnd_add((msb_bit_len == curve_order_square_bitlen),
					 &m_msb_fixed, &m_msb_fixed, &curve_order_square); EG(ret, err);
		} else {
			/* Case where m >= (q**2) */
			ret = nn_copy(&m_msb_fixed, m); EG(ret, err);
		}
	}

	ret = nn_bitlen(&m_msb_fixed, &mlen); EG(ret, err);
	MUST_HAVE((mlen != 0), ret, err);
	mlen--;

	/* Hide possible internal failures for double and add
	 * operations and perform the operation in constant time.
	 */
	ret_ops = 0;

	/* Get a random r with the same size of m_msb_fixed */
	ret = nn_get_random_len(&r, (u16)(m_msb_fixed.wlen * WORD_BYTES)); EG(ret, err);

	ret = nn_getbit(&r, mlen, &rbit); EG(ret, err);

	/* Initialize points */
	ret = prj_pt_init(&T[0], in->crv); EG(ret, err);
	ret = prj_pt_init(&T[1], in->crv); EG(ret, err);
	ret = prj_pt_init(&T[2], in->crv); EG(ret, err);

	/* Initialize T[r[n-1]] to input point */
	/*
	 * Blind the point with projective coordinates
	 * (X, Y, Z) => (l*X, l*Y, l*Z)
	 */
	ret = _blind_projective_point(&T[rbit], in); EG(ret, err);

	/* Initialize T[1-r[n-1]] with ECDBL(T[r[n-1]])) */
#ifndef NO_USE_COMPLETE_FORMULAS
	/*
	 * NOTE: in case of complete formulas, we use the
	 * addition for doubling, incurring a small performance hit
	 * for better SCA resistance.
	 */
	ret_ops |= prj_pt_add(&T[1-rbit], &T[rbit], &T[rbit]);
#else
	ret_ops |= prj_pt_dbl(&T[1-rbit], &T[rbit]);
#endif

	/* Main loop of the Montgomery Ladder */
	while (mlen > 0) {
		u8 rbit_next;
		--mlen;
		/* rbit is r[i+1], and rbit_next is r[i] */
		ret = nn_getbit(&r, mlen, &rbit_next); EG(ret, err);

		/* mbit is m[i] */
		ret = nn_getbit(&m_msb_fixed, mlen, &mbit); EG(ret, err);
		/* Double: T[2] = ECDBL(T[d[i] ^ r[i+1]]) */

#ifndef NO_USE_COMPLETE_FORMULAS
		/* NOTE: in case of complete formulas, we use the
		 * addition for doubling, incurring a small performance hit
		 * for better SCA resistance.
		 */
		ret_ops |= prj_pt_add(&T[2], &T[mbit ^ rbit], &T[mbit ^ rbit]);
#else
		ret_ops |= prj_pt_dbl(&T[2], &T[mbit ^ rbit]);
#endif

		/* Add: T[1] = ECADD(T[0],T[1]) */
		ret_ops |= prj_pt_add(&T[1], &T[0], &T[1]);

		/* T[0] = T[2-(d[i] ^ r[i])] */
		/*
		 * NOTE: we use the low level nn_copy function here to avoid
		 * any possible leakage on operands with prj_pt_copy
		 */
		ret = nn_copy(&(T[0].X.fp_val), &(T[2-(mbit ^ rbit_next)].X.fp_val)); EG(ret, err);
		ret = nn_copy(&(T[0].Y.fp_val), &(T[2-(mbit ^ rbit_next)].Y.fp_val)); EG(ret, err);
		ret = nn_copy(&(T[0].Z.fp_val), &(T[2-(mbit ^ rbit_next)].Z.fp_val)); EG(ret, err);

		/* T[1] = T[1+(d[i] ^ r[i])] */
		/* NOTE: we use the low level nn_copy function here to avoid
		 * any possible leakage on operands with prj_pt_copy
		 */
		ret = nn_copy(&(T[1].X.fp_val), &(T[1+(mbit ^ rbit_next)].X.fp_val)); EG(ret, err);
		ret = nn_copy(&(T[1].Y.fp_val), &(T[1+(mbit ^ rbit_next)].Y.fp_val)); EG(ret, err);
		ret = nn_copy(&(T[1].Z.fp_val), &(T[1+(mbit ^ rbit_next)].Z.fp_val)); EG(ret, err);

		/* Update rbit */
		rbit = rbit_next;
	}
	/* Output: T[r[0]] */
	ret = prj_pt_copy(out, &T[rbit]); EG(ret, err);

	/* Take into consideration our double and add errors */
	ret |= ret_ops;

err:
	prj_pt_uninit(&T[0]);
	prj_pt_uninit(&T[1]);
	prj_pt_uninit(&T[2]);
	nn_uninit(&r);
	nn_uninit(&m_msb_fixed);
	nn_uninit(&curve_order_square);

	PTR_NULLIFY(curve_order);

	return ret;
}
#endif

/* Main projective scalar multiplication function.
 * Depending on the preprocessing options, we use either the
 * Double and Add Always algorithm, or the Montgomery Ladder one.
 */
ATTRIBUTE_WARN_UNUSED_RET static int _prj_pt_mul_ltr_monty(prj_pt_t out, nn_src_t m, prj_pt_src_t in){
#if defined(USE_DOUBLE_ADD_ALWAYS)
	return _prj_pt_mul_ltr_monty_dbl_add_always(out, m, in);
#elif defined(USE_MONTY_LADDER)
	return _prj_pt_mul_ltr_monty_ladder(out, m, in);
#else
#error "Error: neither Double and Add Always nor Montgomery Ladder has been selected!"
#endif
}

/* version with 'm' passed via 'out'. */
ATTRIBUTE_WARN_UNUSED_RET static int _prj_pt_mul_ltr_monty_aliased(prj_pt_t out, nn_src_t m, prj_pt_src_t in)
{
	prj_pt out_cpy;
	int ret;
	out_cpy.magic = WORD(0);

	ret = prj_pt_init(&out_cpy, in->crv); EG(ret, err);
	ret = _prj_pt_mul_ltr_monty(&out_cpy, m, in); EG(ret, err);
	ret = prj_pt_copy(out, &out_cpy);

err:
	prj_pt_uninit(&out_cpy);
	return ret;
}

/* Aliased version. This is the public main interface of our
 * scalar multiplication algorithm. Checks that the input point
 * and that the output point are on the curve are performed here
 * (before and after calling the core algorithm, albeit Double and
 * Add Always or Montgomery Ladder).
 */
int prj_pt_mul(prj_pt_t out, nn_src_t m, prj_pt_src_t in)
{
	int ret, on_curve;

	ret = prj_pt_check_initialized(in); EG(ret, err);
	ret = nn_check_initialized(m); EG(ret, err);

	/* Check that the input is on the curve */
	MUST_HAVE((!prj_pt_is_on_curve(in, &on_curve)) && on_curve, ret, err);

	if (out == in) {
		ret = _prj_pt_mul_ltr_monty_aliased(out, m, in); EG(ret, err);
	} else {
		ret = _prj_pt_mul_ltr_monty(out, m, in); EG(ret, err);
	}

	/* Check that the output is on the curve */
	MUST_HAVE((!prj_pt_is_on_curve(out, &on_curve)) && on_curve, ret, err);

err:
	return ret;
}

int prj_pt_mul_blind(prj_pt_t out, nn_src_t m, prj_pt_src_t in)
{
	/* Blind the scalar m with (b*q) where q is the curve order.
	 * NOTE: the curve order and the "generator" order are
	 * usually the same (i.e. cofactor = 1) for the classical
	 * prime fields curves. However some exceptions exist
	 * (e.g. Wei25519 and Wei448), and in this case it is
	 * curcial to use the curve order for a generic blinding
	 * working on any point on the curve.
	 */
	nn b;
	nn_src_t q;
	int ret;
	b.magic = WORD(0);

	ret = prj_pt_check_initialized(in); EG(ret, err);

	q = &(in->crv->order);

	ret = nn_init(&b, 0); EG(ret, err);

	ret = nn_get_random_mod(&b, q); EG(ret, err);

	ret = nn_mul(&b, &b, q); EG(ret, err);
	ret = nn_add(&b, &b, m); EG(ret, err);

	/* NOTE: point blinding is performed in the lower functions */
	/* NOTE: check that input and output points are on the curve are
	 * performed in the lower functions.
	 */

	/* Perform the scalar multiplication */
	ret = prj_pt_mul(out, &b, in);

err:
	nn_uninit(&b);

	PTR_NULLIFY(q);

	return ret;
}

/* Naive double and add scalar multiplication.
 *
 * This scalar multiplication is used on public values and is optimized with no
 * countermeasures, and it is usually faster as scalar can be small with few bits
 * to process (e.g. cofactors, etc.).
 *
 * out is initialized by the function.
 *
 * XXX: WARNING: this function must only be used on public points!
 *
 */
static int __prj_pt_unprotected_mult(prj_pt_t out, nn_src_t scalar, prj_pt_src_t public_in)
{
        u8 expbit;
        bitcnt_t explen;
        int ret, iszero, on_curve;

        ret = prj_pt_check_initialized(public_in); EG(ret, err);
        ret = nn_check_initialized(scalar); EG(ret, err);

	/* This function does not support aliasing */
	MUST_HAVE((out != public_in), ret, err);

	/* Check that the input is on the curve */
	MUST_HAVE((!prj_pt_is_on_curve(public_in, &on_curve)) && on_curve, ret, err);

        ret = nn_iszero(scalar, &iszero); EG(ret, err);
	/* Multiplication by zero is the point at infinity */
	if(iszero){
		ret = prj_pt_zero(out); EG(ret, err);
		goto err;
	}

        ret = nn_bitlen(scalar, &explen); EG(ret, err);
        /* Sanity check */
        MUST_HAVE((explen > 0), ret, err);
        explen = (bitcnt_t)(explen - 1);
	ret = prj_pt_copy(out, public_in); EG(ret, err);

        while (explen > 0) {
                explen = (bitcnt_t)(explen - 1);
                ret = nn_getbit(scalar, explen, &expbit); EG(ret, err);
                ret = prj_pt_dbl(out, out); EG(ret, err);
                if(expbit){
                        ret = prj_pt_add(out, out, public_in); EG(ret, err);
                }
        }

	/* Check that the output is on the curve */
	MUST_HAVE((!prj_pt_is_on_curve(out, &on_curve)) && on_curve, ret, err);

err:
        VAR_ZEROIFY(expbit);
        VAR_ZEROIFY(explen);

        return ret;
}

/* Aliased version of __prj_pt_unprotected_mult */
int _prj_pt_unprotected_mult(prj_pt_t out, nn_src_t scalar, prj_pt_src_t public_in)
{
	int ret;

	if(out == public_in){
                prj_pt A;
                A.magic = WORD(0);

                ret = prj_pt_copy(&A, public_in); EG(ret, err1);
		ret = __prj_pt_unprotected_mult(out, scalar, &A);
err1:
		prj_pt_uninit(&A);
		goto err;
	}
	else{
		ret = __prj_pt_unprotected_mult(out, scalar, public_in);
	}
err:
	return ret;
}
/*
 * Check if an integer is (a multiple of) a projective point order.
 *
 * The function returns 0 on success, -1 on error. The value check is set to 1 if the projective
 * point has order in_isorder, 0 otherwise. The value is meaningless on error.
 */
int check_prj_pt_order(prj_pt_src_t in_shortw, nn_src_t in_isorder, prj_pt_sensitivity s, int *check)
{
	int ret, iszero;
	prj_pt res;
	res.magic = WORD(0);

	/* First sanity checks */
	ret = prj_pt_check_initialized(in_shortw); EG(ret, err);
	ret = nn_check_initialized(in_isorder); EG(ret, err);
	MUST_HAVE((check != NULL), ret, err);

	/* Then, perform the scalar multiplication */
	if(s == PUBLIC_PT){
		/* If this is a public point, we can use the naive scalar multiplication */
		ret = _prj_pt_unprotected_mult(&res, in_isorder, in_shortw); EG(ret, err);
	}
	else{
		/* If the point is private, it is sensitive and we proceed with the secure
		 * scalar blind multiplication.
		 */
		ret = prj_pt_mul_blind(&res, in_isorder, in_shortw); EG(ret, err);
	}

	/* Check if we have the point at infinity */
	ret = prj_pt_iszero(&res, &iszero); EG(ret, err);
	(*check) = iszero;

err:
	prj_pt_uninit(&res);

	return ret;
}

/*****************************************************************************/

/*
 * Map points from Edwards to short Weierstrass projective points through Montgomery (composition mapping).
 *     Point at infinity (0, 1) -> (0, 1, 0) is treated as an exception, which is trivially not constant time.
 *     This is OK since our mapping functions should be used at the non sensitive input and output
 *     interfaces.
 *
 * The function returns 0 on success, -1 on error.
 */
int aff_pt_edwards_to_prj_pt_shortw(aff_pt_edwards_src_t in_edwards,
				    ec_shortw_crv_src_t shortw_crv,
				    prj_pt_t out_shortw,
				    fp_src_t alpha_edwards)
{
	int ret, iszero, cmp;
	aff_pt out_shortw_aff;
	fp one;
	out_shortw_aff.magic = one.magic = WORD(0);

	/* Check the curves compatibility */
	ret = aff_pt_edwards_check_initialized(in_edwards); EG(ret, err);
	ret = curve_edwards_shortw_check(in_edwards->crv, shortw_crv, alpha_edwards); EG(ret, err);

	/* Initialize output point with curve */
	ret = prj_pt_init(out_shortw, shortw_crv); EG(ret, err);

	ret = fp_init(&one, in_edwards->x.ctx); EG(ret, err);
	ret = fp_one(&one); EG(ret, err);

	/* Check if we are the point at infinity
	 * This check induces a non consant time exception, but the current function must be called on
	 * public data anyways.
	 */
	ret = fp_iszero(&(in_edwards->x), &iszero); EG(ret, err);
	ret = fp_cmp(&(in_edwards->y), &one, &cmp); EG(ret, err);
	if(iszero && (cmp == 0)){
		ret = prj_pt_zero(out_shortw); EG(ret, err);
		ret = 0;
		goto err;
	}

	/* Use the affine mapping */
	ret = aff_pt_edwards_to_shortw(in_edwards, shortw_crv, &out_shortw_aff, alpha_edwards); EG(ret, err);
	/* And then map the short Weierstrass affine to projective coordinates */
	ret = ec_shortw_aff_to_prj(out_shortw, &out_shortw_aff);

err:
	fp_uninit(&one);
	aff_pt_uninit(&out_shortw_aff);

	return ret;
}

/*
 * Map points from short Weierstrass projective points to Edwards through Montgomery (composition mapping).
 *     Point at infinity with Z=0 (in projective coordinates) -> (0, 1) is treated as an exception, which is trivially not constant time.
 *     This is OK since our mapping functions should be used at the non sensitive input and output
 *     interfaces.
 *
 * The function returns 0 on success, -1 on error.
 */
int prj_pt_shortw_to_aff_pt_edwards(prj_pt_src_t in_shortw,
				    ec_edwards_crv_src_t edwards_crv,
				    aff_pt_edwards_t out_edwards,
				    fp_src_t alpha_edwards)
{
	int ret, iszero;
	aff_pt in_shortw_aff;
	in_shortw_aff.magic = WORD(0);

	/* Check the curves compatibility */
	ret = prj_pt_check_initialized(in_shortw); EG(ret, err);
	ret = curve_edwards_shortw_check(edwards_crv, in_shortw->crv, alpha_edwards); EG(ret, err);

	/* Initialize output point with curve */
	ret = aff_pt_init(&in_shortw_aff, in_shortw->crv); EG(ret, err);

	/* Check if we are the point at infinity
	 * This check induces a non consant time exception, but the current function must be called on
	 * public data anyways.
	 */
	ret = prj_pt_iszero(in_shortw, &iszero); EG(ret, err);
	if(iszero){
		fp zero, one;
		zero.magic = one.magic = WORD(0);

		ret = fp_init(&zero, in_shortw->X.ctx); EG(ret, err1);
		ret = fp_init(&one, in_shortw->X.ctx); EG(ret, err1);

		ret = fp_zero(&zero); EG(ret, err1);
		ret = fp_one(&one); EG(ret, err1);

		ret = aff_pt_edwards_init_from_coords(out_edwards, edwards_crv, &zero, &one);

err1:
		fp_uninit(&zero);
		fp_uninit(&one);

		goto err;
	}

	/* Map projective to affine on the short Weierstrass */
	ret = prj_pt_to_aff(&in_shortw_aff, in_shortw); EG(ret, err);
	/* Use the affine mapping */
	ret = aff_pt_shortw_to_edwards(&in_shortw_aff, edwards_crv, out_edwards, alpha_edwards);

err:
	aff_pt_uninit(&in_shortw_aff);

	return ret;
}

/*
 * Map points from Montgomery to short Weierstrass projective points.
 *
 * The function returns 0 on success, -1 on error.
 */
int aff_pt_montgomery_to_prj_pt_shortw(aff_pt_montgomery_src_t in_montgomery,
				       ec_shortw_crv_src_t shortw_crv,
				       prj_pt_t out_shortw)
{
	int ret;
	aff_pt out_shortw_aff;
	out_shortw_aff.magic = WORD(0);

	/* Check the curves compatibility */
	ret = aff_pt_montgomery_check_initialized(in_montgomery); EG(ret, err);
	ret = curve_montgomery_shortw_check(in_montgomery->crv, shortw_crv); EG(ret, err);

	/* Initialize output point with curve */
	ret = prj_pt_init(out_shortw, shortw_crv); EG(ret, err);

	/* Use the affine mapping */
	ret = aff_pt_montgomery_to_shortw(in_montgomery, shortw_crv, &out_shortw_aff); EG(ret, err);
	/* And then map the short Weierstrass affine to projective coordinates */
	ret = ec_shortw_aff_to_prj(out_shortw, &out_shortw_aff);

err:
	aff_pt_uninit(&out_shortw_aff);

	return ret;
}

/*
 * Map points from short Weierstrass projective points to Montgomery.
 *
 * The function returns 0 on success, -1 on error.
 */
int prj_pt_shortw_to_aff_pt_montgomery(prj_pt_src_t in_shortw, ec_montgomery_crv_src_t montgomery_crv, aff_pt_montgomery_t out_montgomery)
{
	int ret;
	aff_pt in_shortw_aff;
	in_shortw_aff.magic = WORD(0);

	/* Check the curves compatibility */
	ret = prj_pt_check_initialized(in_shortw); EG(ret, err);
	ret = curve_montgomery_shortw_check(montgomery_crv, in_shortw->crv); EG(ret, err);

	/* Initialize output point with curve */
	ret = aff_pt_init(&in_shortw_aff, in_shortw->crv); EG(ret, err);

	/* Map projective to affine on the short Weierstrass */
	ret = prj_pt_to_aff(&in_shortw_aff, in_shortw); EG(ret, err);
	/* Use the affine mapping */
	ret = aff_pt_shortw_to_montgomery(&in_shortw_aff, montgomery_crv, out_montgomery);

err:
	aff_pt_uninit(&in_shortw_aff);

	return ret;
}