1*b4dd7d09SAndy Fiddaman /*********************************************************************** 2*b4dd7d09SAndy Fiddaman * * 3*b4dd7d09SAndy Fiddaman * This software is part of the ast package * 4*b4dd7d09SAndy Fiddaman * Copyright (c) 1996-2011 AT&T Intellectual Property * 5*b4dd7d09SAndy Fiddaman * and is licensed under the * 6*b4dd7d09SAndy Fiddaman * Eclipse Public License, Version 1.0 * 7*b4dd7d09SAndy Fiddaman * by AT&T Intellectual Property * 8*b4dd7d09SAndy Fiddaman * * 9*b4dd7d09SAndy Fiddaman * A copy of the License is available at * 10*b4dd7d09SAndy Fiddaman * http://www.eclipse.org/org/documents/epl-v10.html * 11*b4dd7d09SAndy Fiddaman * (with md5 checksum b35adb5213ca9657e911e9befb180842) * 12*b4dd7d09SAndy Fiddaman * * 13*b4dd7d09SAndy Fiddaman * Information and Software Systems Research * 14*b4dd7d09SAndy Fiddaman * AT&T Research * 15*b4dd7d09SAndy Fiddaman * Florham Park NJ * 16*b4dd7d09SAndy Fiddaman * * 17*b4dd7d09SAndy Fiddaman * Glenn Fowler <gsf@research.att.com> * 18*b4dd7d09SAndy Fiddaman * * 19*b4dd7d09SAndy Fiddaman ***********************************************************************/ 20*b4dd7d09SAndy Fiddaman #pragma prototyped 21*b4dd7d09SAndy Fiddaman 22*b4dd7d09SAndy Fiddaman /* 23*b4dd7d09SAndy Fiddaman * att 24*b4dd7d09SAndy Fiddaman */ 25*b4dd7d09SAndy Fiddaman 26*b4dd7d09SAndy Fiddaman #define att_description \ 27*b4dd7d09SAndy Fiddaman "The system 5 release 4 checksum. This is the default for \bsum\b \ 28*b4dd7d09SAndy Fiddaman when \bgetconf UNIVERSE\b is \batt\b. This is the only true sum; \ 29*b4dd7d09SAndy Fiddaman all of the other methods are order dependent." 30*b4dd7d09SAndy Fiddaman #define att_options 0 31*b4dd7d09SAndy Fiddaman #define att_match "att|sys5|s5|default" 32*b4dd7d09SAndy Fiddaman #define att_open long_open 33*b4dd7d09SAndy Fiddaman #define att_init long_init 34*b4dd7d09SAndy Fiddaman #define att_print long_print 35*b4dd7d09SAndy Fiddaman #define att_data long_data 36*b4dd7d09SAndy Fiddaman #define att_scale 512 37*b4dd7d09SAndy Fiddaman 38*b4dd7d09SAndy Fiddaman #if defined(__SUNPRO_C) || defined(__GNUC__) 39*b4dd7d09SAndy Fiddaman 40*b4dd7d09SAndy Fiddaman #if defined(__SUNPRO_C) 41*b4dd7d09SAndy Fiddaman # include <sun_prefetch.h> 42*b4dd7d09SAndy Fiddaman # define sum_prefetch(addr) sun_prefetch_read_many((void *)(addr)) 43*b4dd7d09SAndy Fiddaman #elif defined(__GNUC__) 44*b4dd7d09SAndy Fiddaman # define sum_prefetch(addr) __builtin_prefetch((addr), 0, 3) 45*b4dd7d09SAndy Fiddaman #else 46*b4dd7d09SAndy Fiddaman # error Unknown compiler 47*b4dd7d09SAndy Fiddaman #endif 48*b4dd7d09SAndy Fiddaman 49*b4dd7d09SAndy Fiddaman #define CBLOCK_SIZE (64) 50*b4dd7d09SAndy Fiddaman #pragma unroll(16) 51*b4dd7d09SAndy Fiddaman 52*b4dd7d09SAndy Fiddaman /* Inmos transputer would love this algorithm */ 53*b4dd7d09SAndy Fiddaman static int 54*b4dd7d09SAndy Fiddaman att_block(register Sum_t* p, const void* s, size_t n) 55*b4dd7d09SAndy Fiddaman { 56*b4dd7d09SAndy Fiddaman register uint32_t c = ((Integral_t*)p)->sum; 57*b4dd7d09SAndy Fiddaman register const unsigned char* b = (const unsigned char*)s; 58*b4dd7d09SAndy Fiddaman register const unsigned char* e = b + n; 59*b4dd7d09SAndy Fiddaman register uint32_t s0, s1, s2, s3, s4, s5, s6, s7; 60*b4dd7d09SAndy Fiddaman register unsigned int i; 61*b4dd7d09SAndy Fiddaman 62*b4dd7d09SAndy Fiddaman s0=s1=s2=s3=s4=s5=s6=s7=0U; 63*b4dd7d09SAndy Fiddaman 64*b4dd7d09SAndy Fiddaman sum_prefetch((void *)b); 65*b4dd7d09SAndy Fiddaman 66*b4dd7d09SAndy Fiddaman while (n > CBLOCK_SIZE) 67*b4dd7d09SAndy Fiddaman { 68*b4dd7d09SAndy Fiddaman sum_prefetch((b+CBLOCK_SIZE)); 69*b4dd7d09SAndy Fiddaman 70*b4dd7d09SAndy Fiddaman /* Compiler will unroll for() loops per #pragma unroll */ 71*b4dd7d09SAndy Fiddaman for (i=0 ; i < (CBLOCK_SIZE/8) ; i++) 72*b4dd7d09SAndy Fiddaman { 73*b4dd7d09SAndy Fiddaman /* 74*b4dd7d09SAndy Fiddaman * use s0-s7 to decouple calculations (this improves pipelining) 75*b4dd7d09SAndy Fiddaman * because each operation is completely independent from it's 76*b4dd7d09SAndy Fiddaman * siblings 77*b4dd7d09SAndy Fiddaman */ 78*b4dd7d09SAndy Fiddaman s0+=b[0]; 79*b4dd7d09SAndy Fiddaman s1+=b[1]; 80*b4dd7d09SAndy Fiddaman s2+=b[2]; 81*b4dd7d09SAndy Fiddaman s3+=b[3]; 82*b4dd7d09SAndy Fiddaman s4+=b[4]; 83*b4dd7d09SAndy Fiddaman s5+=b[5]; 84*b4dd7d09SAndy Fiddaman s6+=b[6]; 85*b4dd7d09SAndy Fiddaman s7+=b[7]; 86*b4dd7d09SAndy Fiddaman 87*b4dd7d09SAndy Fiddaman b+=8; 88*b4dd7d09SAndy Fiddaman n-=8; 89*b4dd7d09SAndy Fiddaman } 90*b4dd7d09SAndy Fiddaman } 91*b4dd7d09SAndy Fiddaman 92*b4dd7d09SAndy Fiddaman c+=s0+s1+s2+s3+s4+s5+s6+s7; 93*b4dd7d09SAndy Fiddaman 94*b4dd7d09SAndy Fiddaman while (b < e) 95*b4dd7d09SAndy Fiddaman c += *b++; 96*b4dd7d09SAndy Fiddaman ((Integral_t*)p)->sum = c; 97*b4dd7d09SAndy Fiddaman return 0; 98*b4dd7d09SAndy Fiddaman } 99*b4dd7d09SAndy Fiddaman 100*b4dd7d09SAndy Fiddaman #else 101*b4dd7d09SAndy Fiddaman static int 102*b4dd7d09SAndy Fiddaman att_block(register Sum_t* p, const void* s, size_t n) 103*b4dd7d09SAndy Fiddaman { 104*b4dd7d09SAndy Fiddaman register uint32_t c = ((Integral_t*)p)->sum; 105*b4dd7d09SAndy Fiddaman register unsigned char* b = (unsigned char*)s; 106*b4dd7d09SAndy Fiddaman register unsigned char* e = b + n; 107*b4dd7d09SAndy Fiddaman 108*b4dd7d09SAndy Fiddaman while (b < e) 109*b4dd7d09SAndy Fiddaman c += *b++; 110*b4dd7d09SAndy Fiddaman ((Integral_t*)p)->sum = c; 111*b4dd7d09SAndy Fiddaman return 0; 112*b4dd7d09SAndy Fiddaman } 113*b4dd7d09SAndy Fiddaman #endif /* defined(__SUNPRO_C) || defined(__GNUC__) */ 114*b4dd7d09SAndy Fiddaman 115*b4dd7d09SAndy Fiddaman static int 116*b4dd7d09SAndy Fiddaman att_done(Sum_t* p) 117*b4dd7d09SAndy Fiddaman { 118*b4dd7d09SAndy Fiddaman register uint32_t c = ((Integral_t*)p)->sum; 119*b4dd7d09SAndy Fiddaman 120*b4dd7d09SAndy Fiddaman c = (c & 0xffff) + ((c >> 16) & 0xffff); 121*b4dd7d09SAndy Fiddaman c = (c & 0xffff) + (c >> 16); 122*b4dd7d09SAndy Fiddaman ((Integral_t*)p)->sum = c & 0xffff; 123*b4dd7d09SAndy Fiddaman return short_done(p); 124*b4dd7d09SAndy Fiddaman } 125