17c2fbfb3SApril Chin /***********************************************************************
27c2fbfb3SApril Chin * *
37c2fbfb3SApril Chin * This software is part of the ast package *
4*3e14f97fSRoger A. Faulkner * Copyright (c) 1996-2010 AT&T Intellectual Property *
57c2fbfb3SApril Chin * and is licensed under the *
67c2fbfb3SApril Chin * Common Public License, Version 1.0 *
77c2fbfb3SApril Chin * by AT&T Intellectual Property *
87c2fbfb3SApril Chin * *
97c2fbfb3SApril Chin * A copy of the License is available at *
107c2fbfb3SApril Chin * http://www.opensource.org/licenses/cpl1.0.txt *
117c2fbfb3SApril Chin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
127c2fbfb3SApril Chin * *
137c2fbfb3SApril Chin * Information and Software Systems Research *
147c2fbfb3SApril Chin * AT&T Research *
157c2fbfb3SApril Chin * Florham Park NJ *
167c2fbfb3SApril Chin * *
177c2fbfb3SApril Chin * Glenn Fowler <gsf@research.att.com> *
187c2fbfb3SApril Chin * *
197c2fbfb3SApril Chin ***********************************************************************/
207c2fbfb3SApril Chin #pragma prototyped
217c2fbfb3SApril Chin
227c2fbfb3SApril Chin /*
237c2fbfb3SApril Chin * att
247c2fbfb3SApril Chin */
257c2fbfb3SApril Chin
267c2fbfb3SApril Chin #define att_description \
277c2fbfb3SApril Chin "The system 5 release 4 checksum. This is the default for \bsum\b \
287c2fbfb3SApril Chin when \bgetconf UNIVERSE\b is \batt\b. This is the only true sum; \
297c2fbfb3SApril Chin all of the other methods are order dependent."
307c2fbfb3SApril Chin #define att_options 0
317c2fbfb3SApril Chin #define att_match "att|sys5|s5|default"
327c2fbfb3SApril Chin #define att_open long_open
337c2fbfb3SApril Chin #define att_init long_init
347c2fbfb3SApril Chin #define att_print long_print
357c2fbfb3SApril Chin #define att_data long_data
367c2fbfb3SApril Chin #define att_scale 512
377c2fbfb3SApril Chin
3834f9b3eeSRoland Mainz #if defined(__SUNPRO_C) || defined(__GNUC__)
3934f9b3eeSRoland Mainz
4034f9b3eeSRoland Mainz #if defined(__SUNPRO_C)
4134f9b3eeSRoland Mainz # include <sun_prefetch.h>
4234f9b3eeSRoland Mainz # define sum_prefetch(addr) sun_prefetch_read_many((void *)(addr))
4334f9b3eeSRoland Mainz #elif defined(__GNUC__)
4434f9b3eeSRoland Mainz # define sum_prefetch(addr) __builtin_prefetch((addr), 0, 3)
4534f9b3eeSRoland Mainz #else
4634f9b3eeSRoland Mainz # error Unknown compiler
4734f9b3eeSRoland Mainz #endif
4834f9b3eeSRoland Mainz
4934f9b3eeSRoland Mainz #define CBLOCK_SIZE (64)
5034f9b3eeSRoland Mainz #pragma unroll(16)
5134f9b3eeSRoland Mainz
5234f9b3eeSRoland Mainz /* Inmos transputer would love this algorithm */
5334f9b3eeSRoland Mainz static int
att_block(register Sum_t * p,const void * s,size_t n)5434f9b3eeSRoland Mainz att_block(register Sum_t* p, const void* s, size_t n)
5534f9b3eeSRoland Mainz {
5634f9b3eeSRoland Mainz register uint32_t c = ((Integral_t*)p)->sum;
5734f9b3eeSRoland Mainz register const unsigned char* b = (const unsigned char*)s;
5834f9b3eeSRoland Mainz register const unsigned char* e = b + n;
5934f9b3eeSRoland Mainz register uint32_t s0, s1, s2, s3, s4, s5, s6, s7;
6034f9b3eeSRoland Mainz register unsigned int i;
6134f9b3eeSRoland Mainz
6234f9b3eeSRoland Mainz s0=s1=s2=s3=s4=s5=s6=s7=0U;
6334f9b3eeSRoland Mainz
6434f9b3eeSRoland Mainz sum_prefetch((void *)b);
6534f9b3eeSRoland Mainz
6634f9b3eeSRoland Mainz while (n > CBLOCK_SIZE)
6734f9b3eeSRoland Mainz {
6834f9b3eeSRoland Mainz sum_prefetch((b+CBLOCK_SIZE));
6934f9b3eeSRoland Mainz
7034f9b3eeSRoland Mainz /* Compiler will unroll for() loops per #pragma unroll */
7134f9b3eeSRoland Mainz for (i=0 ; i < (CBLOCK_SIZE/8) ; i++)
7234f9b3eeSRoland Mainz {
7334f9b3eeSRoland Mainz /*
7434f9b3eeSRoland Mainz * use s0-s7 to decouple calculations (this improves pipelining)
7534f9b3eeSRoland Mainz * because each operation is completely independent from it's
7634f9b3eeSRoland Mainz * siblings
7734f9b3eeSRoland Mainz */
7834f9b3eeSRoland Mainz s0+=b[0];
7934f9b3eeSRoland Mainz s1+=b[1];
8034f9b3eeSRoland Mainz s2+=b[2];
8134f9b3eeSRoland Mainz s3+=b[3];
8234f9b3eeSRoland Mainz s4+=b[4];
8334f9b3eeSRoland Mainz s5+=b[5];
8434f9b3eeSRoland Mainz s6+=b[6];
8534f9b3eeSRoland Mainz s7+=b[7];
8634f9b3eeSRoland Mainz
8734f9b3eeSRoland Mainz b+=8;
8834f9b3eeSRoland Mainz n-=8;
8934f9b3eeSRoland Mainz }
9034f9b3eeSRoland Mainz }
9134f9b3eeSRoland Mainz
9234f9b3eeSRoland Mainz c+=s0+s1+s2+s3+s4+s5+s6+s7;
9334f9b3eeSRoland Mainz
9434f9b3eeSRoland Mainz while (b < e)
9534f9b3eeSRoland Mainz c += *b++;
9634f9b3eeSRoland Mainz ((Integral_t*)p)->sum = c;
9734f9b3eeSRoland Mainz return 0;
9834f9b3eeSRoland Mainz }
9934f9b3eeSRoland Mainz
10034f9b3eeSRoland Mainz #else
1017c2fbfb3SApril Chin static int
att_block(register Sum_t * p,const void * s,size_t n)1027c2fbfb3SApril Chin att_block(register Sum_t* p, const void* s, size_t n)
1037c2fbfb3SApril Chin {
1047c2fbfb3SApril Chin register uint32_t c = ((Integral_t*)p)->sum;
1057c2fbfb3SApril Chin register unsigned char* b = (unsigned char*)s;
1067c2fbfb3SApril Chin register unsigned char* e = b + n;
1077c2fbfb3SApril Chin
1087c2fbfb3SApril Chin while (b < e)
1097c2fbfb3SApril Chin c += *b++;
1107c2fbfb3SApril Chin ((Integral_t*)p)->sum = c;
1117c2fbfb3SApril Chin return 0;
1127c2fbfb3SApril Chin }
11334f9b3eeSRoland Mainz #endif /* defined(__SUNPRO_C) || defined(__GNUC__) */
1147c2fbfb3SApril Chin
1157c2fbfb3SApril Chin static int
att_done(Sum_t * p)1167c2fbfb3SApril Chin att_done(Sum_t* p)
1177c2fbfb3SApril Chin {
1187c2fbfb3SApril Chin register uint32_t c = ((Integral_t*)p)->sum;
1197c2fbfb3SApril Chin
1207c2fbfb3SApril Chin c = (c & 0xffff) + ((c >> 16) & 0xffff);
1217c2fbfb3SApril Chin c = (c & 0xffff) + (c >> 16);
1227c2fbfb3SApril Chin ((Integral_t*)p)->sum = c & 0xffff;
1237c2fbfb3SApril Chin return short_done(p);
1247c2fbfb3SApril Chin }
125