xref: /titanic_50/usr/src/lib/libsum/common/sum-att.c (revision f38cb554a534c6df738be3f4d23327e69888e634)
1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *          Copyright (c) 1996-2010 AT&T Intellectual Property          *
5 *                      and is licensed under the                       *
6 *                  Common Public License, Version 1.0                  *
7 *                    by AT&T Intellectual Property                     *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *            http://www.opensource.org/licenses/cpl1.0.txt             *
11 *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                                                                      *
19 ***********************************************************************/
20 #pragma prototyped
21 
22 /*
23  * att
24  */
25 
26 #define att_description	\
27 	"The system 5 release 4 checksum. This is the default for \bsum\b \
28 	when \bgetconf UNIVERSE\b is \batt\b. This is the only true sum; \
29 	all of the other methods are order dependent."
30 #define att_options	0
31 #define att_match	"att|sys5|s5|default"
32 #define att_open	long_open
33 #define att_init	long_init
34 #define att_print	long_print
35 #define att_data	long_data
36 #define att_scale	512
37 
38 #if defined(__SUNPRO_C) || defined(__GNUC__)
39 
40 #if defined(__SUNPRO_C)
41 #    include <sun_prefetch.h>
42 #    define sum_prefetch(addr) sun_prefetch_read_many((void *)(addr))
43 #elif defined(__GNUC__)
44 #    define sum_prefetch(addr) __builtin_prefetch((addr), 0, 3)
45 #else
46 #    error Unknown compiler
47 #endif
48 
49 #define CBLOCK_SIZE (64)
50 #pragma unroll(16)
51 
52 /* Inmos transputer would love this algorithm */
53 static int
54 att_block(register Sum_t* p, const void* s, size_t n)
55 {
56 	register uint32_t	c = ((Integral_t*)p)->sum;
57 	register const unsigned char*	b = (const unsigned char*)s;
58 	register const unsigned char*	e = b + n;
59 	register uint32_t s0, s1, s2, s3, s4, s5, s6, s7;
60 	register unsigned int i;
61 
62 	s0=s1=s2=s3=s4=s5=s6=s7=0U;
63 
64 	sum_prefetch((void *)b);
65 
66 	while (n > CBLOCK_SIZE)
67 	{
68 		sum_prefetch((b+CBLOCK_SIZE));
69 
70 		/* Compiler will unroll for() loops per #pragma unroll */
71 		for (i=0 ; i < (CBLOCK_SIZE/8) ; i++)
72 		{
73 			/*
74 			 * use s0-s7 to decouple calculations (this improves pipelining)
75 			 * because each operation is completely independent from it's
76 			 * siblings
77 			 */
78 			s0+=b[0];
79 			s1+=b[1];
80 			s2+=b[2];
81 			s3+=b[3];
82 			s4+=b[4];
83 			s5+=b[5];
84 			s6+=b[6];
85 			s7+=b[7];
86 
87 			b+=8;
88 			n-=8;
89 		}
90 	}
91 
92 	c+=s0+s1+s2+s3+s4+s5+s6+s7;
93 
94 	while (b < e)
95 		c += *b++;
96 	((Integral_t*)p)->sum = c;
97 	return 0;
98 }
99 
100 #else
101 static int
102 att_block(register Sum_t* p, const void* s, size_t n)
103 {
104 	register uint32_t	c = ((Integral_t*)p)->sum;
105 	register unsigned char*	b = (unsigned char*)s;
106 	register unsigned char*	e = b + n;
107 
108 	while (b < e)
109 		c += *b++;
110 	((Integral_t*)p)->sum = c;
111 	return 0;
112 }
113 #endif /* defined(__SUNPRO_C) || defined(__GNUC__) */
114 
115 static int
116 att_done(Sum_t* p)
117 {
118 	register uint32_t	c = ((Integral_t*)p)->sum;
119 
120 	c = (c & 0xffff) + ((c >> 16) & 0xffff);
121 	c = (c & 0xffff) + (c >> 16);
122 	((Integral_t*)p)->sum = c & 0xffff;
123 	return short_done(p);
124 }
125