/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
 */
/*
 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#include "libm_inlines.h"

#ifdef __RESTRICT
#define	restrict _Restrict
#else
#define	restrict
#endif

extern double sqrt(double);

/*
 * Instead of type punning, use union type.
 */
typedef union h32 {
	float f;
	unsigned u;
} h32;

void
__vhypotf(int n, float *restrict x, int stridex, float *restrict y,
    int stridey, float *restrict z, int stridez)
{
	float		x0, x1, x2, y0, y1, y2, z0, z1, z2, *pz0, *pz1, *pz2;
	h32		hx0, hx1, hx2, hy0, hy1, hy2;
	int		i, j0, j1, j2;

	do {
LOOP0:
		hx0.f = *x;
		hy0.f = *y;
		hx0.u &= ~0x80000000;
		hy0.u &= ~0x80000000;
		x0 = hx0.f;
		y0 = hy0.f;
		if (hy0.u > hx0.u) {
			i = hy0.u - hx0.u;
			j0 = hy0.u & 0x7f800000;
			if (hx0.u == 0)
				i = 0x7f800000;
		} else {
			i = hx0.u - hy0.u;
			j0 = hx0.u & 0x7f800000;
			if (hy0.u == 0)
				i = 0x7f800000;
			else if (hx0.u == 0)
				i = 0x7f800000;
		}
		if (i >= 0x0c800000 || j0 >= 0x7f800000) {
			z0 = x0 + y0;
			if (hx0.u == 0x7f800000)
				z0 = x0;
			else if (hy0.u  == 0x7f800000)
				z0 = y0;
			else if (hx0.u > 0x7f800000 || hy0.u > 0x7f800000)
				z0 = *x + *y;
			*z = z0;
			x += stridex;
			y += stridey;
			z += stridez;
			i = 0;
			if (--n <= 0)
				break;
			goto LOOP0;
		}
		pz0 = z;
		x += stridex;
		y += stridey;
		z += stridez;
		i = 1;
		if (--n <= 0)
			break;

LOOP1:
		hx1.f = *x;
		hy1.f = *y;
		hx1.u &= ~0x80000000;
		hy1.u &= ~0x80000000;
		x1 = hx1.f;
		y1 = hy1.f;
		if (hy1.u > hx1.u) {
			i = hy1.u - hx1.u;
			j1 = hy1.u & 0x7f800000;
			if (hx1.u == 0)
				i = 0x7f800000;
		} else {
			i = hx1.u - hy1.u;
			j1 = hx1.u & 0x7f800000;
			if (hy1.u == 0)
				i = 0x7f800000;
			else if (hx1.u == 0)
				i = 0x7f800000;
		}
		if (i >= 0x0c800000 || j1 >= 0x7f800000) {
			z1 = x1 + y1;
			if (hx1.u == 0x7f800000)
				z1 = x1;
			else if (hy1.u == 0x7f800000)
				z1 = y1;
			else if (hx1.u > 0x7f800000 || hy1.u > 0x7f800000)
				z1 = *x + *y;
			*z = z1;
			x += stridex;
			y += stridey;
			z += stridez;
			i = 1;
			if (--n <= 0)
				break;
			goto LOOP1;
		}
		pz1 = z;
		x += stridex;
		y += stridey;
		z += stridez;
		i = 2;
		if (--n <= 0)
			break;

LOOP2:
		hx2.f = *x;
		hy2.f = *y;
		hx2.u &= ~0x80000000;
		hy2.u &= ~0x80000000;
		x2 = hx2.f;
		y2 = hy2.f;
		if (hy2.u > hx2.u) {
			i = hy2.u - hx2.u;
			j2 = hy2.u & 0x7f800000;
			if (hx2.u == 0)
				i = 0x7f800000;
		} else {
			i = hx2.u - hy2.u;
			j2 = hx2.u & 0x7f800000;
			if (hy2.u == 0)
				i = 0x7f800000;
			else if (hx2.u == 0)
				i = 0x7f800000;
		}
		if (i >= 0x0c800000 || j2 >= 0x7f800000) {
			z2 = x2 + y2;
			if (hx2.u == 0x7f800000)
				z2 = x2;
			else if (hy2.u == 0x7f800000)
				z2 = y2;
			else if (hx2.u > 0x7f800000 || hy2.u > 0x7f800000)
				z2 = *x + *y;
			*z = z2;
			x += stridex;
			y += stridey;
			z += stridez;
			i = 2;
			if (--n <= 0)
				break;
			goto LOOP2;
		}
		pz2 = z;

		z0 = sqrt(x0 * (double)x0 + y0 * (double)y0);
		z1 = sqrt(x1 * (double)x1 + y1 * (double)y1);
		z2 = sqrt(x2 * (double)x2 + y2 * (double)y2);
		*pz0 = z0;
		*pz1 = z1;
		*pz2 = z2;

		x += stridex;
		y += stridey;
		z += stridez;
		i = 0;
	} while (--n > 0);

	if (i > 0) {
		if (i > 1) {
			z1 = sqrt(x1 * (double)x1 + y1 * (double)y1);
			*pz1 = z1;
		}
		z0 = sqrt(x0 * (double)x0 + y0 * (double)y0);
		*pz0 = z0;
	}
}