17c2fbfb3SApril Chin /*********************************************************************** 27c2fbfb3SApril Chin * * 37c2fbfb3SApril Chin * This software is part of the ast package * 4*34f9b3eeSRoland Mainz * Copyright (c) 1996-2009 AT&T Intellectual Property * 57c2fbfb3SApril Chin * and is licensed under the * 67c2fbfb3SApril Chin * Common Public License, Version 1.0 * 77c2fbfb3SApril Chin * by AT&T Intellectual Property * 87c2fbfb3SApril Chin * * 97c2fbfb3SApril Chin * A copy of the License is available at * 107c2fbfb3SApril Chin * http://www.opensource.org/licenses/cpl1.0.txt * 117c2fbfb3SApril Chin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 127c2fbfb3SApril Chin * * 137c2fbfb3SApril Chin * Information and Software Systems Research * 147c2fbfb3SApril Chin * AT&T Research * 157c2fbfb3SApril Chin * Florham Park NJ * 167c2fbfb3SApril Chin * * 177c2fbfb3SApril Chin * Glenn Fowler <gsf@research.att.com> * 187c2fbfb3SApril Chin * * 197c2fbfb3SApril Chin ***********************************************************************/ 207c2fbfb3SApril Chin #pragma prototyped 217c2fbfb3SApril Chin 227c2fbfb3SApril Chin /* 237c2fbfb3SApril Chin * att 247c2fbfb3SApril Chin */ 257c2fbfb3SApril Chin 267c2fbfb3SApril Chin #define att_description \ 277c2fbfb3SApril Chin "The system 5 release 4 checksum. This is the default for \bsum\b \ 287c2fbfb3SApril Chin when \bgetconf UNIVERSE\b is \batt\b. This is the only true sum; \ 297c2fbfb3SApril Chin all of the other methods are order dependent." 307c2fbfb3SApril Chin #define att_options 0 317c2fbfb3SApril Chin #define att_match "att|sys5|s5|default" 327c2fbfb3SApril Chin #define att_open long_open 337c2fbfb3SApril Chin #define att_init long_init 347c2fbfb3SApril Chin #define att_print long_print 357c2fbfb3SApril Chin #define att_data long_data 367c2fbfb3SApril Chin #define att_scale 512 377c2fbfb3SApril Chin 38*34f9b3eeSRoland Mainz #if defined(__SUNPRO_C) || defined(__GNUC__) 39*34f9b3eeSRoland Mainz 40*34f9b3eeSRoland Mainz #if defined(__SUNPRO_C) 41*34f9b3eeSRoland Mainz # include <sun_prefetch.h> 42*34f9b3eeSRoland Mainz # define sum_prefetch(addr) sun_prefetch_read_many((void *)(addr)) 43*34f9b3eeSRoland Mainz #elif defined(__GNUC__) 44*34f9b3eeSRoland Mainz # define sum_prefetch(addr) __builtin_prefetch((addr), 0, 3) 45*34f9b3eeSRoland Mainz #else 46*34f9b3eeSRoland Mainz # error Unknown compiler 47*34f9b3eeSRoland Mainz #endif 48*34f9b3eeSRoland Mainz 49*34f9b3eeSRoland Mainz #define CBLOCK_SIZE (64) 50*34f9b3eeSRoland Mainz #pragma unroll(16) 51*34f9b3eeSRoland Mainz 52*34f9b3eeSRoland Mainz /* Inmos transputer would love this algorithm */ 53*34f9b3eeSRoland Mainz static int 54*34f9b3eeSRoland Mainz att_block(register Sum_t* p, const void* s, size_t n) 55*34f9b3eeSRoland Mainz { 56*34f9b3eeSRoland Mainz register uint32_t c = ((Integral_t*)p)->sum; 57*34f9b3eeSRoland Mainz register const unsigned char* b = (const unsigned char*)s; 58*34f9b3eeSRoland Mainz register const unsigned char* e = b + n; 59*34f9b3eeSRoland Mainz register uint32_t s0, s1, s2, s3, s4, s5, s6, s7; 60*34f9b3eeSRoland Mainz register unsigned int i; 61*34f9b3eeSRoland Mainz 62*34f9b3eeSRoland Mainz s0=s1=s2=s3=s4=s5=s6=s7=0U; 63*34f9b3eeSRoland Mainz 64*34f9b3eeSRoland Mainz sum_prefetch((void *)b); 65*34f9b3eeSRoland Mainz 66*34f9b3eeSRoland Mainz while (n > CBLOCK_SIZE) 67*34f9b3eeSRoland Mainz { 68*34f9b3eeSRoland Mainz sum_prefetch((b+CBLOCK_SIZE)); 69*34f9b3eeSRoland Mainz 70*34f9b3eeSRoland Mainz /* Compiler will unroll for() loops per #pragma unroll */ 71*34f9b3eeSRoland Mainz for (i=0 ; i < (CBLOCK_SIZE/8) ; i++) 72*34f9b3eeSRoland Mainz { 73*34f9b3eeSRoland Mainz /* 74*34f9b3eeSRoland Mainz * use s0-s7 to decouple calculations (this improves pipelining) 75*34f9b3eeSRoland Mainz * because each operation is completely independent from it's 76*34f9b3eeSRoland Mainz * siblings 77*34f9b3eeSRoland Mainz */ 78*34f9b3eeSRoland Mainz s0+=b[0]; 79*34f9b3eeSRoland Mainz s1+=b[1]; 80*34f9b3eeSRoland Mainz s2+=b[2]; 81*34f9b3eeSRoland Mainz s3+=b[3]; 82*34f9b3eeSRoland Mainz s4+=b[4]; 83*34f9b3eeSRoland Mainz s5+=b[5]; 84*34f9b3eeSRoland Mainz s6+=b[6]; 85*34f9b3eeSRoland Mainz s7+=b[7]; 86*34f9b3eeSRoland Mainz 87*34f9b3eeSRoland Mainz b+=8; 88*34f9b3eeSRoland Mainz n-=8; 89*34f9b3eeSRoland Mainz } 90*34f9b3eeSRoland Mainz } 91*34f9b3eeSRoland Mainz 92*34f9b3eeSRoland Mainz c+=s0+s1+s2+s3+s4+s5+s6+s7; 93*34f9b3eeSRoland Mainz 94*34f9b3eeSRoland Mainz while (b < e) 95*34f9b3eeSRoland Mainz c += *b++; 96*34f9b3eeSRoland Mainz ((Integral_t*)p)->sum = c; 97*34f9b3eeSRoland Mainz return 0; 98*34f9b3eeSRoland Mainz } 99*34f9b3eeSRoland Mainz 100*34f9b3eeSRoland Mainz #else 1017c2fbfb3SApril Chin static int 1027c2fbfb3SApril Chin att_block(register Sum_t* p, const void* s, size_t n) 1037c2fbfb3SApril Chin { 1047c2fbfb3SApril Chin register uint32_t c = ((Integral_t*)p)->sum; 1057c2fbfb3SApril Chin register unsigned char* b = (unsigned char*)s; 1067c2fbfb3SApril Chin register unsigned char* e = b + n; 1077c2fbfb3SApril Chin 1087c2fbfb3SApril Chin while (b < e) 1097c2fbfb3SApril Chin c += *b++; 1107c2fbfb3SApril Chin ((Integral_t*)p)->sum = c; 1117c2fbfb3SApril Chin return 0; 1127c2fbfb3SApril Chin } 113*34f9b3eeSRoland Mainz #endif /* defined(__SUNPRO_C) || defined(__GNUC__) */ 1147c2fbfb3SApril Chin 1157c2fbfb3SApril Chin static int 1167c2fbfb3SApril Chin att_done(Sum_t* p) 1177c2fbfb3SApril Chin { 1187c2fbfb3SApril Chin register uint32_t c = ((Integral_t*)p)->sum; 1197c2fbfb3SApril Chin 1207c2fbfb3SApril Chin c = (c & 0xffff) + ((c >> 16) & 0xffff); 1217c2fbfb3SApril Chin c = (c & 0xffff) + (c >> 16); 1227c2fbfb3SApril Chin ((Integral_t*)p)->sum = c & 0xffff; 1237c2fbfb3SApril Chin return short_done(p); 1247c2fbfb3SApril Chin } 125