xref: /titanic_41/usr/src/lib/libsum/common/sum-att.c (revision 34f9b3eef6fdadbda0a846aa4d68691ac40eace5)
17c2fbfb3SApril Chin /***********************************************************************
27c2fbfb3SApril Chin *                                                                      *
37c2fbfb3SApril Chin *               This software is part of the ast package               *
4*34f9b3eeSRoland Mainz *          Copyright (c) 1996-2009 AT&T Intellectual Property          *
57c2fbfb3SApril Chin *                      and is licensed under the                       *
67c2fbfb3SApril Chin *                  Common Public License, Version 1.0                  *
77c2fbfb3SApril Chin *                    by AT&T Intellectual Property                     *
87c2fbfb3SApril Chin *                                                                      *
97c2fbfb3SApril Chin *                A copy of the License is available at                 *
107c2fbfb3SApril Chin *            http://www.opensource.org/licenses/cpl1.0.txt             *
117c2fbfb3SApril Chin *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
127c2fbfb3SApril Chin *                                                                      *
137c2fbfb3SApril Chin *              Information and Software Systems Research               *
147c2fbfb3SApril Chin *                            AT&T Research                             *
157c2fbfb3SApril Chin *                           Florham Park NJ                            *
167c2fbfb3SApril Chin *                                                                      *
177c2fbfb3SApril Chin *                 Glenn Fowler <gsf@research.att.com>                  *
187c2fbfb3SApril Chin *                                                                      *
197c2fbfb3SApril Chin ***********************************************************************/
207c2fbfb3SApril Chin #pragma prototyped
217c2fbfb3SApril Chin 
227c2fbfb3SApril Chin /*
237c2fbfb3SApril Chin  * att
247c2fbfb3SApril Chin  */
257c2fbfb3SApril Chin 
267c2fbfb3SApril Chin #define att_description	\
277c2fbfb3SApril Chin 	"The system 5 release 4 checksum. This is the default for \bsum\b \
287c2fbfb3SApril Chin 	when \bgetconf UNIVERSE\b is \batt\b. This is the only true sum; \
297c2fbfb3SApril Chin 	all of the other methods are order dependent."
307c2fbfb3SApril Chin #define att_options	0
317c2fbfb3SApril Chin #define att_match	"att|sys5|s5|default"
327c2fbfb3SApril Chin #define att_open	long_open
337c2fbfb3SApril Chin #define att_init	long_init
347c2fbfb3SApril Chin #define att_print	long_print
357c2fbfb3SApril Chin #define att_data	long_data
367c2fbfb3SApril Chin #define att_scale	512
377c2fbfb3SApril Chin 
38*34f9b3eeSRoland Mainz #if defined(__SUNPRO_C) || defined(__GNUC__)
39*34f9b3eeSRoland Mainz 
40*34f9b3eeSRoland Mainz #if defined(__SUNPRO_C)
41*34f9b3eeSRoland Mainz #    include <sun_prefetch.h>
42*34f9b3eeSRoland Mainz #    define sum_prefetch(addr) sun_prefetch_read_many((void *)(addr))
43*34f9b3eeSRoland Mainz #elif defined(__GNUC__)
44*34f9b3eeSRoland Mainz #    define sum_prefetch(addr) __builtin_prefetch((addr), 0, 3)
45*34f9b3eeSRoland Mainz #else
46*34f9b3eeSRoland Mainz #    error Unknown compiler
47*34f9b3eeSRoland Mainz #endif
48*34f9b3eeSRoland Mainz 
49*34f9b3eeSRoland Mainz #define CBLOCK_SIZE (64)
50*34f9b3eeSRoland Mainz #pragma unroll(16)
51*34f9b3eeSRoland Mainz 
52*34f9b3eeSRoland Mainz /* Inmos transputer would love this algorithm */
53*34f9b3eeSRoland Mainz static int
54*34f9b3eeSRoland Mainz att_block(register Sum_t* p, const void* s, size_t n)
55*34f9b3eeSRoland Mainz {
56*34f9b3eeSRoland Mainz 	register uint32_t	c = ((Integral_t*)p)->sum;
57*34f9b3eeSRoland Mainz 	register const unsigned char*	b = (const unsigned char*)s;
58*34f9b3eeSRoland Mainz 	register const unsigned char*	e = b + n;
59*34f9b3eeSRoland Mainz 	register uint32_t s0, s1, s2, s3, s4, s5, s6, s7;
60*34f9b3eeSRoland Mainz 	register unsigned int i;
61*34f9b3eeSRoland Mainz 
62*34f9b3eeSRoland Mainz 	s0=s1=s2=s3=s4=s5=s6=s7=0U;
63*34f9b3eeSRoland Mainz 
64*34f9b3eeSRoland Mainz 	sum_prefetch((void *)b);
65*34f9b3eeSRoland Mainz 
66*34f9b3eeSRoland Mainz 	while (n > CBLOCK_SIZE)
67*34f9b3eeSRoland Mainz 	{
68*34f9b3eeSRoland Mainz 		sum_prefetch((b+CBLOCK_SIZE));
69*34f9b3eeSRoland Mainz 
70*34f9b3eeSRoland Mainz 		/* Compiler will unroll for() loops per #pragma unroll */
71*34f9b3eeSRoland Mainz 		for (i=0 ; i < (CBLOCK_SIZE/8) ; i++)
72*34f9b3eeSRoland Mainz 		{
73*34f9b3eeSRoland Mainz 			/*
74*34f9b3eeSRoland Mainz 			 * use s0-s7 to decouple calculations (this improves pipelining)
75*34f9b3eeSRoland Mainz 			 * because each operation is completely independent from it's
76*34f9b3eeSRoland Mainz 			 * siblings
77*34f9b3eeSRoland Mainz 			 */
78*34f9b3eeSRoland Mainz 			s0+=b[0];
79*34f9b3eeSRoland Mainz 			s1+=b[1];
80*34f9b3eeSRoland Mainz 			s2+=b[2];
81*34f9b3eeSRoland Mainz 			s3+=b[3];
82*34f9b3eeSRoland Mainz 			s4+=b[4];
83*34f9b3eeSRoland Mainz 			s5+=b[5];
84*34f9b3eeSRoland Mainz 			s6+=b[6];
85*34f9b3eeSRoland Mainz 			s7+=b[7];
86*34f9b3eeSRoland Mainz 
87*34f9b3eeSRoland Mainz 			b+=8;
88*34f9b3eeSRoland Mainz 			n-=8;
89*34f9b3eeSRoland Mainz 		}
90*34f9b3eeSRoland Mainz 	}
91*34f9b3eeSRoland Mainz 
92*34f9b3eeSRoland Mainz 	c+=s0+s1+s2+s3+s4+s5+s6+s7;
93*34f9b3eeSRoland Mainz 
94*34f9b3eeSRoland Mainz 	while (b < e)
95*34f9b3eeSRoland Mainz 		c += *b++;
96*34f9b3eeSRoland Mainz 	((Integral_t*)p)->sum = c;
97*34f9b3eeSRoland Mainz 	return 0;
98*34f9b3eeSRoland Mainz }
99*34f9b3eeSRoland Mainz 
100*34f9b3eeSRoland Mainz #else
1017c2fbfb3SApril Chin static int
1027c2fbfb3SApril Chin att_block(register Sum_t* p, const void* s, size_t n)
1037c2fbfb3SApril Chin {
1047c2fbfb3SApril Chin 	register uint32_t	c = ((Integral_t*)p)->sum;
1057c2fbfb3SApril Chin 	register unsigned char*	b = (unsigned char*)s;
1067c2fbfb3SApril Chin 	register unsigned char*	e = b + n;
1077c2fbfb3SApril Chin 
1087c2fbfb3SApril Chin 	while (b < e)
1097c2fbfb3SApril Chin 		c += *b++;
1107c2fbfb3SApril Chin 	((Integral_t*)p)->sum = c;
1117c2fbfb3SApril Chin 	return 0;
1127c2fbfb3SApril Chin }
113*34f9b3eeSRoland Mainz #endif /* defined(__SUNPRO_C) || defined(__GNUC__) */
1147c2fbfb3SApril Chin 
1157c2fbfb3SApril Chin static int
1167c2fbfb3SApril Chin att_done(Sum_t* p)
1177c2fbfb3SApril Chin {
1187c2fbfb3SApril Chin 	register uint32_t	c = ((Integral_t*)p)->sum;
1197c2fbfb3SApril Chin 
1207c2fbfb3SApril Chin 	c = (c & 0xffff) + ((c >> 16) & 0xffff);
1217c2fbfb3SApril Chin 	c = (c & 0xffff) + (c >> 16);
1227c2fbfb3SApril Chin 	((Integral_t*)p)->sum = c & 0xffff;
1237c2fbfb3SApril Chin 	return short_done(p);
1247c2fbfb3SApril Chin }
125