1*5d9d9091SRichard Lowe/* 2*5d9d9091SRichard Lowe * CDDL HEADER START 3*5d9d9091SRichard Lowe * 4*5d9d9091SRichard Lowe * The contents of this file are subject to the terms of the 5*5d9d9091SRichard Lowe * Common Development and Distribution License (the "License"). 6*5d9d9091SRichard Lowe * You may not use this file except in compliance with the License. 7*5d9d9091SRichard Lowe * 8*5d9d9091SRichard Lowe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*5d9d9091SRichard Lowe * or http://www.opensolaris.org/os/licensing. 10*5d9d9091SRichard Lowe * See the License for the specific language governing permissions 11*5d9d9091SRichard Lowe * and limitations under the License. 12*5d9d9091SRichard Lowe * 13*5d9d9091SRichard Lowe * When distributing Covered Code, include this CDDL HEADER in each 14*5d9d9091SRichard Lowe * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*5d9d9091SRichard Lowe * If applicable, add the following below this CDDL HEADER, with the 16*5d9d9091SRichard Lowe * fields enclosed by brackets "[]" replaced with your own identifying 17*5d9d9091SRichard Lowe * information: Portions Copyright [yyyy] [name of copyright owner] 18*5d9d9091SRichard Lowe * 19*5d9d9091SRichard Lowe * CDDL HEADER END 20*5d9d9091SRichard Lowe */ 21*5d9d9091SRichard Lowe 22*5d9d9091SRichard Lowe/* 23*5d9d9091SRichard Lowe * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved. 24*5d9d9091SRichard Lowe */ 25*5d9d9091SRichard Lowe 26*5d9d9091SRichard Lowe .file "memcmp.s" 27*5d9d9091SRichard Lowe 28*5d9d9091SRichard Lowe/* 29*5d9d9091SRichard Lowe * memcmp(s1, s2, len) 30*5d9d9091SRichard Lowe * 31*5d9d9091SRichard Lowe * Compare n bytes: s1>s2: >0 s1==s2: 0 s1<s2: <0 32*5d9d9091SRichard Lowe * 33*5d9d9091SRichard Lowe * Fast assembler language version of the following C-program for memcmp 34*5d9d9091SRichard Lowe * which represents the `standard' for the C-library. 35*5d9d9091SRichard Lowe * 36*5d9d9091SRichard Lowe * int 37*5d9d9091SRichard Lowe * memcmp(const void *s1, const void *s2, size_t n) 38*5d9d9091SRichard Lowe * { 39*5d9d9091SRichard Lowe * if (s1 != s2 && n != 0) { 40*5d9d9091SRichard Lowe * const char *ps1 = s1; 41*5d9d9091SRichard Lowe * const char *ps2 = s2; 42*5d9d9091SRichard Lowe * do { 43*5d9d9091SRichard Lowe * if (*ps1++ != *ps2++) 44*5d9d9091SRichard Lowe * return(ps1[-1] - ps2[-1]); 45*5d9d9091SRichard Lowe * } while (--n != 0); 46*5d9d9091SRichard Lowe * } 47*5d9d9091SRichard Lowe * return (0); 48*5d9d9091SRichard Lowe * } 49*5d9d9091SRichard Lowe */ 50*5d9d9091SRichard Lowe 51*5d9d9091SRichard Lowe#include <sys/asm_linkage.h> 52*5d9d9091SRichard Lowe#include <sys/sun4asi.h> 53*5d9d9091SRichard Lowe 54*5d9d9091SRichard Lowe ANSI_PRAGMA_WEAK(memcmp,function) 55*5d9d9091SRichard Lowe 56*5d9d9091SRichard Lowe ENTRY(memcmp) 57*5d9d9091SRichard Lowe cmp %o0, %o1 ! s1 == s2? 58*5d9d9091SRichard Lowe be %ncc, .cmpeq 59*5d9d9091SRichard Lowe 60*5d9d9091SRichard Lowe ! for small counts byte compare immediately 61*5d9d9091SRichard Lowe cmp %o2, 48 62*5d9d9091SRichard Lowe bleu,a %ncc, .bytcmp 63*5d9d9091SRichard Lowe mov %o2, %o3 ! o3 <= 48 64*5d9d9091SRichard Lowe 65*5d9d9091SRichard Lowe ! Count > 48. We will byte compare (8 + num of bytes to dbl align) 66*5d9d9091SRichard Lowe ! bytes. We assume that most miscompares will occur in the 1st 8 bytes 67*5d9d9091SRichard Lowe 68*5d9d9091SRichard Lowe.chkdbl: 69*5d9d9091SRichard Lowe and %o0, 7, %o4 ! is s1 aligned on a 8 byte bound 70*5d9d9091SRichard Lowe mov 8, %o3 ! o2 > 48; o3 = 8 71*5d9d9091SRichard Lowe sub %o4, 8, %o4 ! o4 = -(num of bytes to dbl align) 72*5d9d9091SRichard Lowe ba %ncc, .bytcmp 73*5d9d9091SRichard Lowe sub %o3, %o4, %o3 ! o3 = 8 + (num of bytes to dbl align) 74*5d9d9091SRichard Lowe 75*5d9d9091SRichard Lowe 76*5d9d9091SRichard Lowe1: ldub [%o1], %o5 ! byte compare loop 77*5d9d9091SRichard Lowe inc %o1 78*5d9d9091SRichard Lowe inc %o0 79*5d9d9091SRichard Lowe dec %o2 80*5d9d9091SRichard Lowe cmp %o4, %o5 81*5d9d9091SRichard Lowe bne %ncc, .noteq 82*5d9d9091SRichard Lowe.bytcmp: 83*5d9d9091SRichard Lowe deccc %o3 84*5d9d9091SRichard Lowe bgeu,a %ncc, 1b 85*5d9d9091SRichard Lowe ldub [%o0], %o4 86*5d9d9091SRichard Lowe 87*5d9d9091SRichard Lowe ! Check to see if there are more bytes to compare 88*5d9d9091SRichard Lowe cmp %o2, 0 ! is o2 > 0 89*5d9d9091SRichard Lowe bgu,a %ncc, .blkchk ! we should already be dbl aligned 90*5d9d9091SRichard Lowe cmp %o2, 320 ! if cnt < 256 + 64 - no Block ld/st 91*5d9d9091SRichard Lowe.cmpeq: 92*5d9d9091SRichard Lowe retl ! strings compare equal 93*5d9d9091SRichard Lowe sub %g0, %g0, %o0 94*5d9d9091SRichard Lowe 95*5d9d9091SRichard Lowe.noteq: 96*5d9d9091SRichard Lowe retl ! strings aren't equal 97*5d9d9091SRichard Lowe sub %o4, %o5, %o0 ! return(*s1 - *s2) 98*5d9d9091SRichard Lowe 99*5d9d9091SRichard Lowe 100*5d9d9091SRichard Lowe ! Now src1 is Double word aligned 101*5d9d9091SRichard Lowe.blkchk: 102*5d9d9091SRichard Lowe bgeu,a %ncc, blkcmp ! do block cmp 103*5d9d9091SRichard Lowe andcc %o0, 63, %o3 ! is src1 block aligned 104*5d9d9091SRichard Lowe 105*5d9d9091SRichard Lowe ! double word compare - using ldd and faligndata. Compares upto 106*5d9d9091SRichard Lowe ! 8 byte multiple count and does byte compare for the residual. 107*5d9d9091SRichard Lowe 108*5d9d9091SRichard Lowe.dwcmp: 109*5d9d9091SRichard Lowe 110*5d9d9091SRichard Lowe rd %fprs, %o3 ! o3 = fprs 111*5d9d9091SRichard Lowe 112*5d9d9091SRichard Lowe ! if fprs.fef == 0, set it. Checking it, reqires 2 instructions. 113*5d9d9091SRichard Lowe ! So set it anyway, without checking. 114*5d9d9091SRichard Lowe wr %g0, 0x4, %fprs ! fprs.fef = 1 115*5d9d9091SRichard Lowe 116*5d9d9091SRichard Lowe andn %o2, 7, %o4 ! o4 has 8 byte aligned cnt 117*5d9d9091SRichard Lowe sub %o4, 8, %o4 118*5d9d9091SRichard Lowe alignaddr %o1, %g0, %g1 119*5d9d9091SRichard Lowe ldd [%g1], %d0 120*5d9d9091SRichard Lowe4: 121*5d9d9091SRichard Lowe add %g1, 8, %g1 122*5d9d9091SRichard Lowe ldd [%g1], %d2 123*5d9d9091SRichard Lowe ldd [%o0], %d6 124*5d9d9091SRichard Lowe faligndata %d0, %d2, %d8 125*5d9d9091SRichard Lowe fcmpne32 %d6, %d8, %o5 126*5d9d9091SRichard Lowe fsrc1 %d6, %d6 ! 2 fsrc1's added since o5 cannot 127*5d9d9091SRichard Lowe fsrc1 %d8, %d8 ! be used for 3 cycles else we 128*5d9d9091SRichard Lowe fmovd %d2, %d0 ! create 9 bubbles in the pipeline 129*5d9d9091SRichard Lowe brnz,a,pn %o5, 6f 130*5d9d9091SRichard Lowe sub %o1, %o0, %o1 ! o1 gets the difference 131*5d9d9091SRichard Lowe subcc %o4, 8, %o4 132*5d9d9091SRichard Lowe add %o0, 8, %o0 133*5d9d9091SRichard Lowe add %o1, 8, %o1 134*5d9d9091SRichard Lowe bgu,pt %ncc, 4b 135*5d9d9091SRichard Lowe sub %o2, 8, %o2 136*5d9d9091SRichard Lowe 137*5d9d9091SRichard Lowe.residcmp: 138*5d9d9091SRichard Lowe ba 6f 139*5d9d9091SRichard Lowe sub %o1, %o0, %o1 ! o1 gets the difference 140*5d9d9091SRichard Lowe 141*5d9d9091SRichard Lowe5: ldub [%o0 + %o1], %o5 ! byte compare loop 142*5d9d9091SRichard Lowe inc %o0 143*5d9d9091SRichard Lowe cmp %o4, %o5 144*5d9d9091SRichard Lowe bne %ncc, .dnoteq 145*5d9d9091SRichard Lowe6: 146*5d9d9091SRichard Lowe deccc %o2 147*5d9d9091SRichard Lowe bgeu,a %ncc, 5b 148*5d9d9091SRichard Lowe ldub [%o0], %o4 149*5d9d9091SRichard Lowe 150*5d9d9091SRichard Lowe and %o3, 0x4, %o3 ! fprs.du = fprs.dl = 0 151*5d9d9091SRichard Lowe wr %o3, %g0, %fprs ! fprs = o3 - restore fprs 152*5d9d9091SRichard Lowe retl 153*5d9d9091SRichard Lowe sub %g0, %g0, %o0 ! strings compare equal 154*5d9d9091SRichard Lowe 155*5d9d9091SRichard Lowe.dnoteq: 156*5d9d9091SRichard Lowe and %o3, 0x4, %o3 ! fprs.du = fprs.dl = 0 157*5d9d9091SRichard Lowe wr %o3, %g0, %fprs ! fprs = o3 - restore fprs 158*5d9d9091SRichard Lowe retl 159*5d9d9091SRichard Lowe sub %o4, %o5, %o0 ! return(*s1 - *s2) 160*5d9d9091SRichard Lowe 161*5d9d9091SRichard Lowe 162*5d9d9091SRichard Loweblkcmp: 163*5d9d9091SRichard Lowe save %sp, -SA(MINFRAME), %sp 164*5d9d9091SRichard Lowe rd %fprs, %l5 ! l5 = fprs 165*5d9d9091SRichard Lowe 166*5d9d9091SRichard Lowe ! if fprs.fef == 0, set it. Checking it, reqires 2 instructions. 167*5d9d9091SRichard Lowe ! So set it anyway, without checking. 168*5d9d9091SRichard Lowe wr %g0, 0x4, %fprs ! fprs.fef = 1 169*5d9d9091SRichard Lowe 170*5d9d9091SRichard Lowe bz,pn %ncc, .blalign ! now block aligned 171*5d9d9091SRichard Lowe sub %i3, 64, %i3 172*5d9d9091SRichard Lowe neg %i3 ! bytes till block aligned 173*5d9d9091SRichard Lowe 174*5d9d9091SRichard Lowe ! Compare %i3 bytes till dst is block (64 byte) aligned. use 175*5d9d9091SRichard Lowe ! double word compares. 176*5d9d9091SRichard Lowe 177*5d9d9091SRichard Lowe alignaddr %i1, %g0, %g1 178*5d9d9091SRichard Lowe ldd [%g1], %d0 179*5d9d9091SRichard Lowe7: 180*5d9d9091SRichard Lowe add %g1, 8, %g1 181*5d9d9091SRichard Lowe ldd [%g1], %d2 182*5d9d9091SRichard Lowe ldd [%i0], %d6 183*5d9d9091SRichard Lowe faligndata %d0, %d2, %d8 184*5d9d9091SRichard Lowe fcmpne32 %d6, %d8, %i5 185*5d9d9091SRichard Lowe fsrc1 %d6, %d6 ! 2 fsrc1's added since i5 cannot 186*5d9d9091SRichard Lowe fsrc1 %d8, %d8 ! be used for 3 cycles else we 187*5d9d9091SRichard Lowe fmovd %d2, %d0 ! create 9 bubbles in the pipeline 188*5d9d9091SRichard Lowe brnz,a,pn %i5, .remcmp 189*5d9d9091SRichard Lowe sub %i1, %i0, %i1 ! i1 gets the difference 190*5d9d9091SRichard Lowe subcc %i3, 8, %i3 191*5d9d9091SRichard Lowe add %i0, 8, %i0 192*5d9d9091SRichard Lowe add %i1, 8, %i1 193*5d9d9091SRichard Lowe bgu,pt %ncc, 7b 194*5d9d9091SRichard Lowe sub %i2, 8, %i2 195*5d9d9091SRichard Lowe 196*5d9d9091SRichard Lowe.blalign: 197*5d9d9091SRichard Lowe 198*5d9d9091SRichard Lowe ! src1 is block aligned 199*5d9d9091SRichard Lowe membar #StoreLoad 200*5d9d9091SRichard Lowe srl %i1, 3, %l6 ! bits 3,4,5 are now least sig in %l6 201*5d9d9091SRichard Lowe andcc %l6, 7, %l6 ! mask everything except bits 1,2 3 202*5d9d9091SRichard Lowe andn %i2, 63, %i3 ! calc number of blocks 203*5d9d9091SRichard Lowe alignaddr %i1, %g0, %g0 ! gen %gsr 204*5d9d9091SRichard Lowe andn %i1, 0x3F, %l7 ! blk aligned address 205*5d9d9091SRichard Lowe sub %i2, %i3, %l2 206*5d9d9091SRichard Lowe andn %l2, 7, %i4 ! calc doubles left after blkcpy 207*5d9d9091SRichard Lowe 208*5d9d9091SRichard Lowe be,a %ncc, 1f ! branch taken if src2 is 64-byte aligned 209*5d9d9091SRichard Lowe ldda [%l7]ASI_BLK_P, %d0 210*5d9d9091SRichard Lowe 211*5d9d9091SRichard Lowe call .+8 ! get the address of this instruction in %o7 212*5d9d9091SRichard Lowe sll %l6, 2, %l4 213*5d9d9091SRichard Lowe add %o7, %l4, %o7 214*5d9d9091SRichard Lowe jmp %o7 + 16 ! jump to the starting ldd instruction 215*5d9d9091SRichard Lowe nop 216*5d9d9091SRichard Lowe ldd [%l7+8], %d2 217*5d9d9091SRichard Lowe ldd [%l7+16], %d4 218*5d9d9091SRichard Lowe ldd [%l7+24], %d6 219*5d9d9091SRichard Lowe ldd [%l7+32], %d8 220*5d9d9091SRichard Lowe ldd [%l7+40], %d10 221*5d9d9091SRichard Lowe ldd [%l7+48], %d12 222*5d9d9091SRichard Lowe ldd [%l7+56], %d14 223*5d9d9091SRichard Lowe1: 224*5d9d9091SRichard Lowe add %l7, 64, %l7 225*5d9d9091SRichard Lowe ldda [%l7]ASI_BLK_P, %d16 226*5d9d9091SRichard Lowe add %l7, 64, %l7 227*5d9d9091SRichard Lowe ldda [%i0]ASI_BLK_P, %d32 228*5d9d9091SRichard Lowe sub %i3, 128, %i3 229*5d9d9091SRichard Lowe 230*5d9d9091SRichard Lowe ! switch statement to get us to the right 8 byte blk within a 231*5d9d9091SRichard Lowe ! 64 byte block 232*5d9d9091SRichard Lowe 233*5d9d9091SRichard Lowe cmp %l6, 4 234*5d9d9091SRichard Lowe bgeu,a hlf 235*5d9d9091SRichard Lowe cmp %l6, 6 236*5d9d9091SRichard Lowe cmp %l6, 2 237*5d9d9091SRichard Lowe bgeu,a sqtr 238*5d9d9091SRichard Lowe nop 239*5d9d9091SRichard Lowe cmp %l6, 1 240*5d9d9091SRichard Lowe be,a seg1 241*5d9d9091SRichard Lowe nop 242*5d9d9091SRichard Lowe ba seg0 243*5d9d9091SRichard Lowe nop 244*5d9d9091SRichard Lowesqtr: 245*5d9d9091SRichard Lowe be,a seg2 246*5d9d9091SRichard Lowe nop 247*5d9d9091SRichard Lowe 248*5d9d9091SRichard Lowe ba,a seg3 249*5d9d9091SRichard Lowe nop 250*5d9d9091SRichard Lowe 251*5d9d9091SRichard Lowehlf: 252*5d9d9091SRichard Lowe bgeu,a fqtr 253*5d9d9091SRichard Lowe nop 254*5d9d9091SRichard Lowe cmp %l6, 5 255*5d9d9091SRichard Lowe be,a seg5 256*5d9d9091SRichard Lowe nop 257*5d9d9091SRichard Lowe ba seg4 258*5d9d9091SRichard Lowe nop 259*5d9d9091SRichard Lowefqtr: 260*5d9d9091SRichard Lowe be,a seg6 261*5d9d9091SRichard Lowe nop 262*5d9d9091SRichard Lowe ba seg7 263*5d9d9091SRichard Lowe nop 264*5d9d9091SRichard Lowe 265*5d9d9091SRichard Lowe! The fsrc1 instructions are to make sure that the results of the fcmpne32 266*5d9d9091SRichard Lowe! are used 3 cycles later - else spitfire adds 9 bubbles. 267*5d9d9091SRichard Lowe 268*5d9d9091SRichard Lowe#define FCMPNE32_D32_D48 \ 269*5d9d9091SRichard Lowe fcmpne32 %d48, %d32, %l0 ;\ 270*5d9d9091SRichard Lowe fcmpne32 %d50, %d34, %l1 ;\ 271*5d9d9091SRichard Lowe fcmpne32 %d52, %d36, %l2 ;\ 272*5d9d9091SRichard Lowe fcmpne32 %d54, %d38, %l3 ;\ 273*5d9d9091SRichard Lowe brnz,a %l0, add ;\ 274*5d9d9091SRichard Lowe mov 0, %l4 ;\ 275*5d9d9091SRichard Lowe fcmpne32 %d56, %d40, %l0 ;\ 276*5d9d9091SRichard Lowe brnz,a %l1, add ;\ 277*5d9d9091SRichard Lowe mov 8, %l4 ;\ 278*5d9d9091SRichard Lowe fcmpne32 %d58, %d42, %l1 ;\ 279*5d9d9091SRichard Lowe brnz,a %l2, add ;\ 280*5d9d9091SRichard Lowe mov 16, %l4 ;\ 281*5d9d9091SRichard Lowe fcmpne32 %d60, %d44, %l2 ;\ 282*5d9d9091SRichard Lowe brnz,a %l3, add ;\ 283*5d9d9091SRichard Lowe mov 24, %l4 ;\ 284*5d9d9091SRichard Lowe fcmpne32 %d62, %d46, %l3 ;\ 285*5d9d9091SRichard Lowe brnz,a %l0, add ;\ 286*5d9d9091SRichard Lowe mov 32, %l4 ;\ 287*5d9d9091SRichard Lowe fsrc1 %d48, %d48 ;\ 288*5d9d9091SRichard Lowe brnz,a %l1, add ;\ 289*5d9d9091SRichard Lowe mov 40, %l4 ;\ 290*5d9d9091SRichard Lowe fsrc1 %d48, %d48 ;\ 291*5d9d9091SRichard Lowe brnz,a %l2, add ;\ 292*5d9d9091SRichard Lowe mov 48, %l4 ;\ 293*5d9d9091SRichard Lowe fsrc1 %d48, %d48 ;\ 294*5d9d9091SRichard Lowe brnz,a %l3, add ;\ 295*5d9d9091SRichard Lowe mov 56, %l4 296*5d9d9091SRichard Lowe 297*5d9d9091SRichard Loweadd: 298*5d9d9091SRichard Lowe add %l4, %i0, %i0 299*5d9d9091SRichard Lowe add %l4, %i1, %i1 300*5d9d9091SRichard Lowe ba .remcmp 301*5d9d9091SRichard Lowe sub %i1, %i0, %i1 302*5d9d9091SRichard Lowe 303*5d9d9091SRichard Lowe#define FALIGN_D0 \ 304*5d9d9091SRichard Lowe faligndata %d0, %d2, %d48 ;\ 305*5d9d9091SRichard Lowe faligndata %d2, %d4, %d50 ;\ 306*5d9d9091SRichard Lowe faligndata %d4, %d6, %d52 ;\ 307*5d9d9091SRichard Lowe faligndata %d6, %d8, %d54 ;\ 308*5d9d9091SRichard Lowe faligndata %d8, %d10, %d56 ;\ 309*5d9d9091SRichard Lowe faligndata %d10, %d12, %d58 ;\ 310*5d9d9091SRichard Lowe faligndata %d12, %d14, %d60 ;\ 311*5d9d9091SRichard Lowe faligndata %d14, %d16, %d62 312*5d9d9091SRichard Lowe 313*5d9d9091SRichard Lowe#define FALIGN_D16 \ 314*5d9d9091SRichard Lowe faligndata %d16, %d18, %d48 ;\ 315*5d9d9091SRichard Lowe faligndata %d18, %d20, %d50 ;\ 316*5d9d9091SRichard Lowe faligndata %d20, %d22, %d52 ;\ 317*5d9d9091SRichard Lowe faligndata %d22, %d24, %d54 ;\ 318*5d9d9091SRichard Lowe faligndata %d24, %d26, %d56 ;\ 319*5d9d9091SRichard Lowe faligndata %d26, %d28, %d58 ;\ 320*5d9d9091SRichard Lowe faligndata %d28, %d30, %d60 ;\ 321*5d9d9091SRichard Lowe faligndata %d30, %d0, %d62 322*5d9d9091SRichard Lowe 323*5d9d9091SRichard Loweseg0: 324*5d9d9091SRichard Lowe FALIGN_D0 325*5d9d9091SRichard Lowe ldda [%l7]ASI_BLK_P, %d0 326*5d9d9091SRichard Lowe add %l7, 64, %l7 327*5d9d9091SRichard Lowe FCMPNE32_D32_D48 328*5d9d9091SRichard Lowe add %i0, 64, %i0 329*5d9d9091SRichard Lowe add %i1, 64, %i1 330*5d9d9091SRichard Lowe subcc %i3, 64, %i3 331*5d9d9091SRichard Lowe bz,pn %ncc, 1f 332*5d9d9091SRichard Lowe sub %i2, 64, %i2 333*5d9d9091SRichard Lowe ldda [%i0]ASI_BLK_P, %d32 334*5d9d9091SRichard Lowe 335*5d9d9091SRichard Lowe FALIGN_D16 336*5d9d9091SRichard Lowe ldda [%l7]ASI_BLK_P, %d16 337*5d9d9091SRichard Lowe add %l7, 64, %l7 338*5d9d9091SRichard Lowe FCMPNE32_D32_D48 339*5d9d9091SRichard Lowe add %i0, 64, %i0 340*5d9d9091SRichard Lowe add %i1, 64, %i1 341*5d9d9091SRichard Lowe subcc %i3, 64, %i3 342*5d9d9091SRichard Lowe bz,pn %ncc, 0f 343*5d9d9091SRichard Lowe sub %i2, 64, %i2 344*5d9d9091SRichard Lowe 345*5d9d9091SRichard Lowe ba %ncc, seg0 346*5d9d9091SRichard Lowe ldda [%i0]ASI_BLK_P, %d32 347*5d9d9091SRichard Lowe 348*5d9d9091SRichard Lowe0: 349*5d9d9091SRichard Lowe ldda [%i0]ASI_BLK_P, %d32 350*5d9d9091SRichard Lowe membar #Sync 351*5d9d9091SRichard Lowe FALIGN_D0 352*5d9d9091SRichard Lowe FCMPNE32_D32_D48 353*5d9d9091SRichard Lowe add %i0, 64, %i0 354*5d9d9091SRichard Lowe add %i1, 64, %i1 355*5d9d9091SRichard Lowe ba %ncc, blkd16 356*5d9d9091SRichard Lowe sub %i2, 64, %i2 357*5d9d9091SRichard Lowe 358*5d9d9091SRichard Lowe1: 359*5d9d9091SRichard Lowe ldda [%i0]ASI_BLK_P, %d32 360*5d9d9091SRichard Lowe membar #Sync 361*5d9d9091SRichard Lowe FALIGN_D16 362*5d9d9091SRichard Lowe FCMPNE32_D32_D48 363*5d9d9091SRichard Lowe add %i0, 64, %i0 364*5d9d9091SRichard Lowe add %i1, 64, %i1 365*5d9d9091SRichard Lowe ba %ncc, blkd0 366*5d9d9091SRichard Lowe sub %i2, 64, %i2 367*5d9d9091SRichard Lowe 368*5d9d9091SRichard Lowe#define FALIGN_D2 \ 369*5d9d9091SRichard Lowe faligndata %d2, %d4, %d48 ;\ 370*5d9d9091SRichard Lowe faligndata %d4, %d6, %d50 ;\ 371*5d9d9091SRichard Lowe faligndata %d6, %d8, %d52 ;\ 372*5d9d9091SRichard Lowe faligndata %d8, %d10, %d54 ;\ 373*5d9d9091SRichard Lowe faligndata %d10, %d12, %d56 ;\ 374*5d9d9091SRichard Lowe faligndata %d12, %d14, %d58 ;\ 375*5d9d9091SRichard Lowe faligndata %d14, %d16, %d60 ;\ 376*5d9d9091SRichard Lowe faligndata %d16, %d18, %d62 377*5d9d9091SRichard Lowe 378*5d9d9091SRichard Lowe#define FALIGN_D18 \ 379*5d9d9091SRichard Lowe faligndata %d18, %d20, %d48 ;\ 380*5d9d9091SRichard Lowe faligndata %d20, %d22, %d50 ;\ 381*5d9d9091SRichard Lowe faligndata %d22, %d24, %d52 ;\ 382*5d9d9091SRichard Lowe faligndata %d24, %d26, %d54 ;\ 383*5d9d9091SRichard Lowe faligndata %d26, %d28, %d56 ;\ 384*5d9d9091SRichard Lowe faligndata %d28, %d30, %d58 ;\ 385*5d9d9091SRichard Lowe faligndata %d30, %d0, %d60 ;\ 386*5d9d9091SRichard Lowe faligndata %d0, %d2, %d62 387*5d9d9091SRichard Lowe 388*5d9d9091SRichard Lowe 389*5d9d9091SRichard Loweseg1: 390*5d9d9091SRichard Lowe FALIGN_D2 391*5d9d9091SRichard Lowe ldda [%l7]ASI_BLK_P, %d0 392*5d9d9091SRichard Lowe add %l7, 64, %l7 393*5d9d9091SRichard Lowe FCMPNE32_D32_D48 394*5d9d9091SRichard Lowe add %i0, 64, %i0 395*5d9d9091SRichard Lowe add %i1, 64, %i1 396*5d9d9091SRichard Lowe subcc %i3, 64, %i3 397*5d9d9091SRichard Lowe bz,pn %ncc, 1f 398*5d9d9091SRichard Lowe sub %i2, 64, %i2 399*5d9d9091SRichard Lowe ldda [%i0]ASI_BLK_P, %d32 400*5d9d9091SRichard Lowe 401*5d9d9091SRichard Lowe FALIGN_D18 402*5d9d9091SRichard Lowe ldda [%l7]ASI_BLK_P, %d16 403*5d9d9091SRichard Lowe add %l7, 64, %l7 404*5d9d9091SRichard Lowe FCMPNE32_D32_D48 405*5d9d9091SRichard Lowe add %i0, 64, %i0 406*5d9d9091SRichard Lowe add %i1, 64, %i1 407*5d9d9091SRichard Lowe subcc %i3, 64, %i3 408*5d9d9091SRichard Lowe bz,pn %ncc, 0f 409*5d9d9091SRichard Lowe sub %i2, 64, %i2 410*5d9d9091SRichard Lowe 411*5d9d9091SRichard Lowe ba %ncc, seg1 412*5d9d9091SRichard Lowe ldda [%i0]ASI_BLK_P, %d32 413*5d9d9091SRichard Lowe 414*5d9d9091SRichard Lowe0: 415*5d9d9091SRichard Lowe ldda [%i0]ASI_BLK_P, %d32 416*5d9d9091SRichard Lowe membar #Sync 417*5d9d9091SRichard Lowe FALIGN_D2 418*5d9d9091SRichard Lowe FCMPNE32_D32_D48 419*5d9d9091SRichard Lowe add %i0, 64, %i0 420*5d9d9091SRichard Lowe add %i1, 64, %i1 421*5d9d9091SRichard Lowe ba %ncc, blkd18 422*5d9d9091SRichard Lowe sub %i2, 64, %i2 423*5d9d9091SRichard Lowe 424*5d9d9091SRichard Lowe1: 425*5d9d9091SRichard Lowe ldda [%i0]ASI_BLK_P, %d32 426*5d9d9091SRichard Lowe membar #Sync 427*5d9d9091SRichard Lowe FALIGN_D18 428*5d9d9091SRichard Lowe FCMPNE32_D32_D48 429*5d9d9091SRichard Lowe add %i0, 64, %i0 430*5d9d9091SRichard Lowe add %i1, 64, %i1 431*5d9d9091SRichard Lowe ba %ncc, blkd2 432*5d9d9091SRichard Lowe sub %i2, 64, %i2 433*5d9d9091SRichard Lowe 434*5d9d9091SRichard Lowe#define FALIGN_D4 \ 435*5d9d9091SRichard Lowe faligndata %d4, %d6, %d48 ;\ 436*5d9d9091SRichard Lowe faligndata %d6, %d8, %d50 ;\ 437*5d9d9091SRichard Lowe faligndata %d8, %d10, %d52 ;\ 438*5d9d9091SRichard Lowe faligndata %d10, %d12, %d54 ;\ 439*5d9d9091SRichard Lowe faligndata %d12, %d14, %d56 ;\ 440*5d9d9091SRichard Lowe faligndata %d14, %d16, %d58 ;\ 441*5d9d9091SRichard Lowe faligndata %d16, %d18, %d60 ;\ 442*5d9d9091SRichard Lowe faligndata %d18, %d20, %d62 443*5d9d9091SRichard Lowe 444*5d9d9091SRichard Lowe#define FALIGN_D20 \ 445*5d9d9091SRichard Lowe faligndata %d20, %d22, %d48 ;\ 446*5d9d9091SRichard Lowe faligndata %d22, %d24, %d50 ;\ 447*5d9d9091SRichard Lowe faligndata %d24, %d26, %d52 ;\ 448*5d9d9091SRichard Lowe faligndata %d26, %d28, %d54 ;\ 449*5d9d9091SRichard Lowe faligndata %d28, %d30, %d56 ;\ 450*5d9d9091SRichard Lowe faligndata %d30, %d0, %d58 ;\ 451*5d9d9091SRichard Lowe faligndata %d0, %d2, %d60 ;\ 452*5d9d9091SRichard Lowe faligndata %d2, %d4, %d62 453*5d9d9091SRichard Lowe 454*5d9d9091SRichard Loweseg2: 455*5d9d9091SRichard Lowe FALIGN_D4 456*5d9d9091SRichard Lowe ldda [%l7]ASI_BLK_P, %d0 457*5d9d9091SRichard Lowe add %l7, 64, %l7 458*5d9d9091SRichard Lowe FCMPNE32_D32_D48 459*5d9d9091SRichard Lowe add %i0, 64, %i0 460*5d9d9091SRichard Lowe add %i1, 64, %i1 461*5d9d9091SRichard Lowe subcc %i3, 64, %i3 462*5d9d9091SRichard Lowe bz,pn %ncc, 1f 463*5d9d9091SRichard Lowe sub %i2, 64, %i2 464*5d9d9091SRichard Lowe ldda [%i0]ASI_BLK_P, %d32 465*5d9d9091SRichard Lowe 466*5d9d9091SRichard Lowe FALIGN_D20 467*5d9d9091SRichard Lowe ldda [%l7]ASI_BLK_P, %d16 468*5d9d9091SRichard Lowe add %l7, 64, %l7 469*5d9d9091SRichard Lowe FCMPNE32_D32_D48 470*5d9d9091SRichard Lowe add %i0, 64, %i0 471*5d9d9091SRichard Lowe add %i1, 64, %i1 472*5d9d9091SRichard Lowe subcc %i3, 64, %i3 473*5d9d9091SRichard Lowe bz,pn %ncc, 0f 474*5d9d9091SRichard Lowe sub %i2, 64, %i2 475*5d9d9091SRichard Lowe 476*5d9d9091SRichard Lowe ba %ncc, seg2 477*5d9d9091SRichard Lowe ldda [%i0]ASI_BLK_P, %d32 478*5d9d9091SRichard Lowe 479*5d9d9091SRichard Lowe0: 480*5d9d9091SRichard Lowe ldda [%i0]ASI_BLK_P, %d32 481*5d9d9091SRichard Lowe membar #Sync 482*5d9d9091SRichard Lowe FALIGN_D4 483*5d9d9091SRichard Lowe FCMPNE32_D32_D48 484*5d9d9091SRichard Lowe add %i0, 64, %i0 485*5d9d9091SRichard Lowe add %i1, 64, %i1 486*5d9d9091SRichard Lowe ba %ncc, blkd20 487*5d9d9091SRichard Lowe sub %i2, 64, %i2 488*5d9d9091SRichard Lowe 489*5d9d9091SRichard Lowe1: 490*5d9d9091SRichard Lowe ldda [%i0]ASI_BLK_P, %d32 491*5d9d9091SRichard Lowe membar #Sync 492*5d9d9091SRichard Lowe FALIGN_D20 493*5d9d9091SRichard Lowe FCMPNE32_D32_D48 494*5d9d9091SRichard Lowe add %i0, 64, %i0 495*5d9d9091SRichard Lowe add %i1, 64, %i1 496*5d9d9091SRichard Lowe ba %ncc, blkd4 497*5d9d9091SRichard Lowe sub %i2, 64, %i2 498*5d9d9091SRichard Lowe 499*5d9d9091SRichard Lowe#define FALIGN_D6 \ 500*5d9d9091SRichard Lowe faligndata %d6, %d8, %d48 ;\ 501*5d9d9091SRichard Lowe faligndata %d8, %d10, %d50 ;\ 502*5d9d9091SRichard Lowe faligndata %d10, %d12, %d52 ;\ 503*5d9d9091SRichard Lowe faligndata %d12, %d14, %d54 ;\ 504*5d9d9091SRichard Lowe faligndata %d14, %d16, %d56 ;\ 505*5d9d9091SRichard Lowe faligndata %d16, %d18, %d58 ;\ 506*5d9d9091SRichard Lowe faligndata %d18, %d20, %d60 ;\ 507*5d9d9091SRichard Lowe faligndata %d20, %d22, %d62 508*5d9d9091SRichard Lowe 509*5d9d9091SRichard Lowe#define FALIGN_D22 \ 510*5d9d9091SRichard Lowe faligndata %d22, %d24, %d48 ;\ 511*5d9d9091SRichard Lowe faligndata %d24, %d26, %d50 ;\ 512*5d9d9091SRichard Lowe faligndata %d26, %d28, %d52 ;\ 513*5d9d9091SRichard Lowe faligndata %d28, %d30, %d54 ;\ 514*5d9d9091SRichard Lowe faligndata %d30, %d0, %d56 ;\ 515*5d9d9091SRichard Lowe faligndata %d0, %d2, %d58 ;\ 516*5d9d9091SRichard Lowe faligndata %d2, %d4, %d60 ;\ 517*5d9d9091SRichard Lowe faligndata %d4, %d6, %d62 518*5d9d9091SRichard Lowe 519*5d9d9091SRichard Lowe 520*5d9d9091SRichard Loweseg3: 521*5d9d9091SRichard Lowe FALIGN_D6 522*5d9d9091SRichard Lowe ldda [%l7]ASI_BLK_P, %d0 523*5d9d9091SRichard Lowe add %l7, 64, %l7 524*5d9d9091SRichard Lowe FCMPNE32_D32_D48 525*5d9d9091SRichard Lowe add %i0, 64, %i0 526*5d9d9091SRichard Lowe add %i1, 64, %i1 527*5d9d9091SRichard Lowe subcc %i3, 64, %i3 528*5d9d9091SRichard Lowe bz,pn %ncc, 1f 529*5d9d9091SRichard Lowe sub %i2, 64, %i2 530*5d9d9091SRichard Lowe ldda [%i0]ASI_BLK_P, %d32 531*5d9d9091SRichard Lowe 532*5d9d9091SRichard Lowe FALIGN_D22 533*5d9d9091SRichard Lowe ldda [%l7]ASI_BLK_P, %d16 534*5d9d9091SRichard Lowe add %l7, 64, %l7 535*5d9d9091SRichard Lowe FCMPNE32_D32_D48 536*5d9d9091SRichard Lowe add %i0, 64, %i0 537*5d9d9091SRichard Lowe add %i1, 64, %i1 538*5d9d9091SRichard Lowe subcc %i3, 64, %i3 539*5d9d9091SRichard Lowe bz,pn %ncc, 0f 540*5d9d9091SRichard Lowe sub %i2, 64, %i2 541*5d9d9091SRichard Lowe 542*5d9d9091SRichard Lowe ba %ncc, seg3 543*5d9d9091SRichard Lowe ldda [%i0]ASI_BLK_P, %d32 544*5d9d9091SRichard Lowe 545*5d9d9091SRichard Lowe 546*5d9d9091SRichard Lowe0: 547*5d9d9091SRichard Lowe ldda [%i0]ASI_BLK_P, %d32 548*5d9d9091SRichard Lowe membar #Sync 549*5d9d9091SRichard Lowe FALIGN_D6 550*5d9d9091SRichard Lowe FCMPNE32_D32_D48 551*5d9d9091SRichard Lowe add %i0, 64, %i0 552*5d9d9091SRichard Lowe add %i1, 64, %i1 553*5d9d9091SRichard Lowe ba %ncc, blkd22 554*5d9d9091SRichard Lowe sub %i2, 64, %i2 555*5d9d9091SRichard Lowe 556*5d9d9091SRichard Lowe1: 557*5d9d9091SRichard Lowe ldda [%i0]ASI_BLK_P, %d32 558*5d9d9091SRichard Lowe membar #Sync 559*5d9d9091SRichard Lowe FALIGN_D22 560*5d9d9091SRichard Lowe FCMPNE32_D32_D48 561*5d9d9091SRichard Lowe add %i0, 64, %i0 562*5d9d9091SRichard Lowe add %i1, 64, %i1 563*5d9d9091SRichard Lowe ba %ncc, blkd6 564*5d9d9091SRichard Lowe sub %i2, 64, %i2 565*5d9d9091SRichard Lowe 566*5d9d9091SRichard Lowe#define FALIGN_D8 \ 567*5d9d9091SRichard Lowe faligndata %d8, %d10, %d48 ;\ 568*5d9d9091SRichard Lowe faligndata %d10, %d12, %d50 ;\ 569*5d9d9091SRichard Lowe faligndata %d12, %d14, %d52 ;\ 570*5d9d9091SRichard Lowe faligndata %d14, %d16, %d54 ;\ 571*5d9d9091SRichard Lowe faligndata %d16, %d18, %d56 ;\ 572*5d9d9091SRichard Lowe faligndata %d18, %d20, %d58 ;\ 573*5d9d9091SRichard Lowe faligndata %d20, %d22, %d60 ;\ 574*5d9d9091SRichard Lowe faligndata %d22, %d24, %d62 575*5d9d9091SRichard Lowe 576*5d9d9091SRichard Lowe#define FALIGN_D24 \ 577*5d9d9091SRichard Lowe faligndata %d24, %d26, %d48 ;\ 578*5d9d9091SRichard Lowe faligndata %d26, %d28, %d50 ;\ 579*5d9d9091SRichard Lowe faligndata %d28, %d30, %d52 ;\ 580*5d9d9091SRichard Lowe faligndata %d30, %d0, %d54 ;\ 581*5d9d9091SRichard Lowe faligndata %d0, %d2, %d56 ;\ 582*5d9d9091SRichard Lowe faligndata %d2, %d4, %d58 ;\ 583*5d9d9091SRichard Lowe faligndata %d4, %d6, %d60 ;\ 584*5d9d9091SRichard Lowe faligndata %d6, %d8, %d62 585*5d9d9091SRichard Lowe 586*5d9d9091SRichard Lowe 587*5d9d9091SRichard Loweseg4: 588*5d9d9091SRichard Lowe FALIGN_D8 589*5d9d9091SRichard Lowe ldda [%l7]ASI_BLK_P, %d0 590*5d9d9091SRichard Lowe add %l7, 64, %l7 591*5d9d9091SRichard Lowe FCMPNE32_D32_D48 592*5d9d9091SRichard Lowe add %i0, 64, %i0 593*5d9d9091SRichard Lowe add %i1, 64, %i1 594*5d9d9091SRichard Lowe subcc %i3, 64, %i3 595*5d9d9091SRichard Lowe bz,pn %ncc, 1f 596*5d9d9091SRichard Lowe sub %i2, 64, %i2 597*5d9d9091SRichard Lowe ldda [%i0]ASI_BLK_P, %d32 598*5d9d9091SRichard Lowe 599*5d9d9091SRichard Lowe FALIGN_D24 600*5d9d9091SRichard Lowe ldda [%l7]ASI_BLK_P, %d16 601*5d9d9091SRichard Lowe add %l7, 64, %l7 602*5d9d9091SRichard Lowe FCMPNE32_D32_D48 603*5d9d9091SRichard Lowe add %i0, 64, %i0 604*5d9d9091SRichard Lowe add %i1, 64, %i1 605*5d9d9091SRichard Lowe subcc %i3, 64, %i3 606*5d9d9091SRichard Lowe bz,pn %ncc, 0f 607*5d9d9091SRichard Lowe sub %i2, 64, %i2 608*5d9d9091SRichard Lowe 609*5d9d9091SRichard Lowe ba %ncc, seg4 610*5d9d9091SRichard Lowe ldda [%i0]ASI_BLK_P, %d32 611*5d9d9091SRichard Lowe 612*5d9d9091SRichard Lowe 613*5d9d9091SRichard Lowe0: 614*5d9d9091SRichard Lowe ldda [%i0]ASI_BLK_P, %d32 615*5d9d9091SRichard Lowe membar #Sync 616*5d9d9091SRichard Lowe FALIGN_D8 617*5d9d9091SRichard Lowe FCMPNE32_D32_D48 618*5d9d9091SRichard Lowe add %i0, 64, %i0 619*5d9d9091SRichard Lowe add %i1, 64, %i1 620*5d9d9091SRichard Lowe ba %ncc, blkd24 621*5d9d9091SRichard Lowe sub %i2, 64, %i2 622*5d9d9091SRichard Lowe 623*5d9d9091SRichard Lowe1: 624*5d9d9091SRichard Lowe ldda [%i0]ASI_BLK_P, %d32 625*5d9d9091SRichard Lowe membar #Sync 626*5d9d9091SRichard Lowe FALIGN_D24 627*5d9d9091SRichard Lowe FCMPNE32_D32_D48 628*5d9d9091SRichard Lowe add %i0, 64, %i0 629*5d9d9091SRichard Lowe add %i1, 64, %i1 630*5d9d9091SRichard Lowe ba %ncc, blkd8 631*5d9d9091SRichard Lowe sub %i2, 64, %i2 632*5d9d9091SRichard Lowe 633*5d9d9091SRichard Lowe#define FALIGN_D10 \ 634*5d9d9091SRichard Lowe faligndata %d10, %d12, %d48 ;\ 635*5d9d9091SRichard Lowe faligndata %d12, %d14, %d50 ;\ 636*5d9d9091SRichard Lowe faligndata %d14, %d16, %d52 ;\ 637*5d9d9091SRichard Lowe faligndata %d16, %d18, %d54 ;\ 638*5d9d9091SRichard Lowe faligndata %d18, %d20, %d56 ;\ 639*5d9d9091SRichard Lowe faligndata %d20, %d22, %d58 ;\ 640*5d9d9091SRichard Lowe faligndata %d22, %d24, %d60 ;\ 641*5d9d9091SRichard Lowe faligndata %d24, %d26, %d62 642*5d9d9091SRichard Lowe 643*5d9d9091SRichard Lowe#define FALIGN_D26 \ 644*5d9d9091SRichard Lowe faligndata %d26, %d28, %d48 ;\ 645*5d9d9091SRichard Lowe faligndata %d28, %d30, %d50 ;\ 646*5d9d9091SRichard Lowe faligndata %d30, %d0, %d52 ;\ 647*5d9d9091SRichard Lowe faligndata %d0, %d2, %d54 ;\ 648*5d9d9091SRichard Lowe faligndata %d2, %d4, %d56 ;\ 649*5d9d9091SRichard Lowe faligndata %d4, %d6, %d58 ;\ 650*5d9d9091SRichard Lowe faligndata %d6, %d8, %d60 ;\ 651*5d9d9091SRichard Lowe faligndata %d8, %d10, %d62 652*5d9d9091SRichard Lowe 653*5d9d9091SRichard Lowe 654*5d9d9091SRichard Loweseg5: 655*5d9d9091SRichard Lowe FALIGN_D10 656*5d9d9091SRichard Lowe ldda [%l7]ASI_BLK_P, %d0 657*5d9d9091SRichard Lowe add %l7, 64, %l7 658*5d9d9091SRichard Lowe FCMPNE32_D32_D48 659*5d9d9091SRichard Lowe add %i0, 64, %i0 660*5d9d9091SRichard Lowe add %i1, 64, %i1 661*5d9d9091SRichard Lowe subcc %i3, 64, %i3 662*5d9d9091SRichard Lowe bz,pn %ncc, 1f 663*5d9d9091SRichard Lowe sub %i2, 64, %i2 664*5d9d9091SRichard Lowe ldda [%i0]ASI_BLK_P, %d32 665*5d9d9091SRichard Lowe 666*5d9d9091SRichard Lowe FALIGN_D26 667*5d9d9091SRichard Lowe ldda [%l7]ASI_BLK_P, %d16 668*5d9d9091SRichard Lowe add %l7, 64, %l7 669*5d9d9091SRichard Lowe FCMPNE32_D32_D48 670*5d9d9091SRichard Lowe add %i0, 64, %i0 671*5d9d9091SRichard Lowe add %i1, 64, %i1 672*5d9d9091SRichard Lowe subcc %i3, 64, %i3 673*5d9d9091SRichard Lowe bz,pn %ncc, 0f 674*5d9d9091SRichard Lowe sub %i2, 64, %i2 675*5d9d9091SRichard Lowe 676*5d9d9091SRichard Lowe ba %ncc, seg5 677*5d9d9091SRichard Lowe ldda [%i0]ASI_BLK_P, %d32 678*5d9d9091SRichard Lowe 679*5d9d9091SRichard Lowe 680*5d9d9091SRichard Lowe0: 681*5d9d9091SRichard Lowe ldda [%i0]ASI_BLK_P, %d32 682*5d9d9091SRichard Lowe membar #Sync 683*5d9d9091SRichard Lowe FALIGN_D10 684*5d9d9091SRichard Lowe FCMPNE32_D32_D48 685*5d9d9091SRichard Lowe add %i0, 64, %i0 686*5d9d9091SRichard Lowe add %i1, 64, %i1 687*5d9d9091SRichard Lowe ba %ncc, blkd26 688*5d9d9091SRichard Lowe sub %i2, 64, %i2 689*5d9d9091SRichard Lowe 690*5d9d9091SRichard Lowe1: 691*5d9d9091SRichard Lowe ldda [%i0]ASI_BLK_P, %d32 692*5d9d9091SRichard Lowe membar #Sync 693*5d9d9091SRichard Lowe FALIGN_D26 694*5d9d9091SRichard Lowe FCMPNE32_D32_D48 695*5d9d9091SRichard Lowe add %i0, 64, %i0 696*5d9d9091SRichard Lowe add %i1, 64, %i1 697*5d9d9091SRichard Lowe ba %ncc, blkd10 698*5d9d9091SRichard Lowe sub %i2, 64, %i2 699*5d9d9091SRichard Lowe 700*5d9d9091SRichard Lowe#define FALIGN_D12 \ 701*5d9d9091SRichard Lowe faligndata %d12, %d14, %d48 ;\ 702*5d9d9091SRichard Lowe faligndata %d14, %d16, %d50 ;\ 703*5d9d9091SRichard Lowe faligndata %d16, %d18, %d52 ;\ 704*5d9d9091SRichard Lowe faligndata %d18, %d20, %d54 ;\ 705*5d9d9091SRichard Lowe faligndata %d20, %d22, %d56 ;\ 706*5d9d9091SRichard Lowe faligndata %d22, %d24, %d58 ;\ 707*5d9d9091SRichard Lowe faligndata %d24, %d26, %d60 ;\ 708*5d9d9091SRichard Lowe faligndata %d26, %d28, %d62 709*5d9d9091SRichard Lowe 710*5d9d9091SRichard Lowe#define FALIGN_D28 \ 711*5d9d9091SRichard Lowe faligndata %d28, %d30, %d48 ;\ 712*5d9d9091SRichard Lowe faligndata %d30, %d0, %d50 ;\ 713*5d9d9091SRichard Lowe faligndata %d0, %d2, %d52 ;\ 714*5d9d9091SRichard Lowe faligndata %d2, %d4, %d54 ;\ 715*5d9d9091SRichard Lowe faligndata %d4, %d6, %d56 ;\ 716*5d9d9091SRichard Lowe faligndata %d6, %d8, %d58 ;\ 717*5d9d9091SRichard Lowe faligndata %d8, %d10, %d60 ;\ 718*5d9d9091SRichard Lowe faligndata %d10, %d12, %d62 719*5d9d9091SRichard Lowe 720*5d9d9091SRichard Lowe 721*5d9d9091SRichard Loweseg6: 722*5d9d9091SRichard Lowe FALIGN_D12 723*5d9d9091SRichard Lowe ldda [%l7]ASI_BLK_P, %d0 724*5d9d9091SRichard Lowe add %l7, 64, %l7 725*5d9d9091SRichard Lowe FCMPNE32_D32_D48 726*5d9d9091SRichard Lowe add %i0, 64, %i0 727*5d9d9091SRichard Lowe add %i1, 64, %i1 728*5d9d9091SRichard Lowe subcc %i3, 64, %i3 729*5d9d9091SRichard Lowe bz,pn %ncc, 1f 730*5d9d9091SRichard Lowe sub %i2, 64, %i2 731*5d9d9091SRichard Lowe ldda [%i0]ASI_BLK_P, %d32 732*5d9d9091SRichard Lowe 733*5d9d9091SRichard Lowe FALIGN_D28 734*5d9d9091SRichard Lowe ldda [%l7]ASI_BLK_P, %d16 735*5d9d9091SRichard Lowe add %l7, 64, %l7 736*5d9d9091SRichard Lowe FCMPNE32_D32_D48 737*5d9d9091SRichard Lowe add %i0, 64, %i0 738*5d9d9091SRichard Lowe add %i1, 64, %i1 739*5d9d9091SRichard Lowe subcc %i3, 64, %i3 740*5d9d9091SRichard Lowe bz,pn %ncc, 0f 741*5d9d9091SRichard Lowe sub %i2, 64, %i2 742*5d9d9091SRichard Lowe 743*5d9d9091SRichard Lowe ba %ncc, seg6 744*5d9d9091SRichard Lowe ldda [%i0]ASI_BLK_P, %d32 745*5d9d9091SRichard Lowe 746*5d9d9091SRichard Lowe 747*5d9d9091SRichard Lowe0: 748*5d9d9091SRichard Lowe ldda [%i0]ASI_BLK_P, %d32 749*5d9d9091SRichard Lowe membar #Sync 750*5d9d9091SRichard Lowe FALIGN_D12 751*5d9d9091SRichard Lowe FCMPNE32_D32_D48 752*5d9d9091SRichard Lowe add %i0, 64, %i0 753*5d9d9091SRichard Lowe add %i1, 64, %i1 754*5d9d9091SRichard Lowe ba %ncc, blkd28 755*5d9d9091SRichard Lowe sub %i2, 64, %i2 756*5d9d9091SRichard Lowe 757*5d9d9091SRichard Lowe1: 758*5d9d9091SRichard Lowe ldda [%i0]ASI_BLK_P, %d32 759*5d9d9091SRichard Lowe membar #Sync 760*5d9d9091SRichard Lowe FALIGN_D28 761*5d9d9091SRichard Lowe FCMPNE32_D32_D48 762*5d9d9091SRichard Lowe add %i0, 64, %i0 763*5d9d9091SRichard Lowe add %i1, 64, %i1 764*5d9d9091SRichard Lowe ba %ncc, blkd12 765*5d9d9091SRichard Lowe sub %i2, 64, %i2 766*5d9d9091SRichard Lowe 767*5d9d9091SRichard Lowe#define FALIGN_D14 \ 768*5d9d9091SRichard Lowe faligndata %d14, %d16, %d48 ;\ 769*5d9d9091SRichard Lowe faligndata %d16, %d18, %d50 ;\ 770*5d9d9091SRichard Lowe faligndata %d18, %d20, %d52 ;\ 771*5d9d9091SRichard Lowe faligndata %d20, %d22, %d54 ;\ 772*5d9d9091SRichard Lowe faligndata %d22, %d24, %d56 ;\ 773*5d9d9091SRichard Lowe faligndata %d24, %d26, %d58 ;\ 774*5d9d9091SRichard Lowe faligndata %d26, %d28, %d60 ;\ 775*5d9d9091SRichard Lowe faligndata %d28, %d30, %d62 776*5d9d9091SRichard Lowe 777*5d9d9091SRichard Lowe#define FALIGN_D30 \ 778*5d9d9091SRichard Lowe faligndata %d30, %d0, %d48 ;\ 779*5d9d9091SRichard Lowe faligndata %d0, %d2, %d50 ;\ 780*5d9d9091SRichard Lowe faligndata %d2, %d4, %d52 ;\ 781*5d9d9091SRichard Lowe faligndata %d4, %d6, %d54 ;\ 782*5d9d9091SRichard Lowe faligndata %d6, %d8, %d56 ;\ 783*5d9d9091SRichard Lowe faligndata %d8, %d10, %d58 ;\ 784*5d9d9091SRichard Lowe faligndata %d10, %d12, %d60 ;\ 785*5d9d9091SRichard Lowe faligndata %d12, %d14, %d62 786*5d9d9091SRichard Lowe 787*5d9d9091SRichard Loweseg7: 788*5d9d9091SRichard Lowe FALIGN_D14 789*5d9d9091SRichard Lowe ldda [%l7]ASI_BLK_P, %d0 790*5d9d9091SRichard Lowe add %l7, 64, %l7 791*5d9d9091SRichard Lowe FCMPNE32_D32_D48 792*5d9d9091SRichard Lowe add %i0, 64, %i0 793*5d9d9091SRichard Lowe add %i1, 64, %i1 794*5d9d9091SRichard Lowe subcc %i3, 64, %i3 795*5d9d9091SRichard Lowe bz,pn %ncc, 1f 796*5d9d9091SRichard Lowe sub %i2, 64, %i2 797*5d9d9091SRichard Lowe ldda [%i0]ASI_BLK_P, %d32 798*5d9d9091SRichard Lowe 799*5d9d9091SRichard Lowe FALIGN_D30 800*5d9d9091SRichard Lowe ldda [%l7]ASI_BLK_P, %d16 801*5d9d9091SRichard Lowe add %l7, 64, %l7 802*5d9d9091SRichard Lowe FCMPNE32_D32_D48 803*5d9d9091SRichard Lowe add %i0, 64, %i0 804*5d9d9091SRichard Lowe add %i1, 64, %i1 805*5d9d9091SRichard Lowe subcc %i3, 64, %i3 806*5d9d9091SRichard Lowe bz,pn %ncc, 0f 807*5d9d9091SRichard Lowe sub %i2, 64, %i2 808*5d9d9091SRichard Lowe 809*5d9d9091SRichard Lowe ba %ncc, seg7 810*5d9d9091SRichard Lowe ldda [%i0]ASI_BLK_P, %d32 811*5d9d9091SRichard Lowe 812*5d9d9091SRichard Lowe0: 813*5d9d9091SRichard Lowe ldda [%i0]ASI_BLK_P, %d32 814*5d9d9091SRichard Lowe membar #Sync 815*5d9d9091SRichard Lowe FALIGN_D14 816*5d9d9091SRichard Lowe FCMPNE32_D32_D48 817*5d9d9091SRichard Lowe add %i0, 64, %i0 818*5d9d9091SRichard Lowe add %i1, 64, %i1 819*5d9d9091SRichard Lowe ba %ncc, blkd30 820*5d9d9091SRichard Lowe sub %i2, 64, %i2 821*5d9d9091SRichard Lowe 822*5d9d9091SRichard Lowe1: 823*5d9d9091SRichard Lowe ldda [%i0]ASI_BLK_P, %d32 824*5d9d9091SRichard Lowe membar #Sync 825*5d9d9091SRichard Lowe FALIGN_D30 826*5d9d9091SRichard Lowe FCMPNE32_D32_D48 827*5d9d9091SRichard Lowe add %i0, 64, %i0 828*5d9d9091SRichard Lowe add %i1, 64, %i1 829*5d9d9091SRichard Lowe ba %ncc, blkd14 830*5d9d9091SRichard Lowe sub %i2, 64, %i2 831*5d9d9091SRichard Lowe 832*5d9d9091SRichard Lowe 833*5d9d9091SRichard Loweblkd0: 834*5d9d9091SRichard Lowe subcc %i4, 8, %i4 835*5d9d9091SRichard Lowe blu,a,pn %ncc, .remcmp 836*5d9d9091SRichard Lowe sub %i1, %i0, %i1 ! i1 gets the difference 837*5d9d9091SRichard Lowe faligndata %d0, %d2, %d48 838*5d9d9091SRichard Lowe ldd [%i0], %d32 839*5d9d9091SRichard Lowe fcmpne32 %d32, %d48, %l1 840*5d9d9091SRichard Lowe fsrc1 %d32, %d32 841*5d9d9091SRichard Lowe fsrc1 %d32, %d32 842*5d9d9091SRichard Lowe fsrc1 %d32, %d32 843*5d9d9091SRichard Lowe brnz,a %l1, .remcmp 844*5d9d9091SRichard Lowe sub %i1, %i0, %i1 ! i1 gets the difference 845*5d9d9091SRichard Lowe add %i0, 8, %i0 846*5d9d9091SRichard Lowe add %i1, 8, %i1 847*5d9d9091SRichard Lowe sub %i2, 8, %i2 848*5d9d9091SRichard Lowe 849*5d9d9091SRichard Loweblkd2: 850*5d9d9091SRichard Lowe subcc %i4, 8, %i4 851*5d9d9091SRichard Lowe blu,a,pn %ncc, .remcmp 852*5d9d9091SRichard Lowe sub %i1, %i0, %i1 ! i1 gets the difference 853*5d9d9091SRichard Lowe faligndata %d2, %d4, %d48 854*5d9d9091SRichard Lowe ldd [%i0], %d32 855*5d9d9091SRichard Lowe fcmpne32 %d32, %d48, %l1 856*5d9d9091SRichard Lowe fsrc1 %d32, %d32 857*5d9d9091SRichard Lowe fsrc1 %d32, %d32 858*5d9d9091SRichard Lowe fsrc1 %d32, %d32 859*5d9d9091SRichard Lowe brnz,a %l1, .remcmp 860*5d9d9091SRichard Lowe sub %i1, %i0, %i1 ! i1 gets the difference 861*5d9d9091SRichard Lowe add %i0, 8, %i0 862*5d9d9091SRichard Lowe add %i1, 8, %i1 863*5d9d9091SRichard Lowe sub %i2, 8, %i2 864*5d9d9091SRichard Lowe 865*5d9d9091SRichard Loweblkd4: 866*5d9d9091SRichard Lowe subcc %i4, 8, %i4 867*5d9d9091SRichard Lowe blu,a,pn %ncc, .remcmp 868*5d9d9091SRichard Lowe sub %i1, %i0, %i1 ! i1 gets the difference 869*5d9d9091SRichard Lowe faligndata %d4, %d6, %d48 870*5d9d9091SRichard Lowe ldd [%i0], %d32 871*5d9d9091SRichard Lowe fcmpne32 %d32, %d48, %l1 872*5d9d9091SRichard Lowe fsrc1 %d32, %d32 873*5d9d9091SRichard Lowe fsrc1 %d32, %d32 874*5d9d9091SRichard Lowe fsrc1 %d32, %d32 875*5d9d9091SRichard Lowe brnz,a %l1, .remcmp 876*5d9d9091SRichard Lowe sub %i1, %i0, %i1 ! i1 gets the difference 877*5d9d9091SRichard Lowe add %i0, 8, %i0 878*5d9d9091SRichard Lowe add %i1, 8, %i1 879*5d9d9091SRichard Lowe sub %i2, 8, %i2 880*5d9d9091SRichard Lowe 881*5d9d9091SRichard Loweblkd6: 882*5d9d9091SRichard Lowe subcc %i4, 8, %i4 883*5d9d9091SRichard Lowe blu,a,pn %ncc, .remcmp 884*5d9d9091SRichard Lowe sub %i1, %i0, %i1 ! i1 gets the difference 885*5d9d9091SRichard Lowe faligndata %d6, %d8, %d48 886*5d9d9091SRichard Lowe ldd [%i0], %d32 887*5d9d9091SRichard Lowe fcmpne32 %d32, %d48, %l1 888*5d9d9091SRichard Lowe fsrc1 %d32, %d32 889*5d9d9091SRichard Lowe fsrc1 %d32, %d32 890*5d9d9091SRichard Lowe fsrc1 %d32, %d32 891*5d9d9091SRichard Lowe brnz,a %l1, .remcmp 892*5d9d9091SRichard Lowe sub %i1, %i0, %i1 ! i1 gets the difference 893*5d9d9091SRichard Lowe add %i0, 8, %i0 894*5d9d9091SRichard Lowe add %i1, 8, %i1 895*5d9d9091SRichard Lowe sub %i2, 8, %i2 896*5d9d9091SRichard Lowe 897*5d9d9091SRichard Loweblkd8: 898*5d9d9091SRichard Lowe subcc %i4, 8, %i4 899*5d9d9091SRichard Lowe blu,a,pn %ncc, .remcmp 900*5d9d9091SRichard Lowe sub %i1, %i0, %i1 ! i1 gets the difference 901*5d9d9091SRichard Lowe faligndata %d8, %d10, %d48 902*5d9d9091SRichard Lowe ldd [%i0], %d32 903*5d9d9091SRichard Lowe fcmpne32 %d32, %d48, %l1 904*5d9d9091SRichard Lowe fsrc1 %d32, %d32 905*5d9d9091SRichard Lowe fsrc1 %d32, %d32 906*5d9d9091SRichard Lowe fsrc1 %d32, %d32 907*5d9d9091SRichard Lowe brnz,a %l1, .remcmp 908*5d9d9091SRichard Lowe sub %i1, %i0, %i1 ! i1 gets the difference 909*5d9d9091SRichard Lowe add %i0, 8, %i0 910*5d9d9091SRichard Lowe add %i1, 8, %i1 911*5d9d9091SRichard Lowe sub %i2, 8, %i2 912*5d9d9091SRichard Lowe 913*5d9d9091SRichard Loweblkd10: 914*5d9d9091SRichard Lowe subcc %i4, 8, %i4 915*5d9d9091SRichard Lowe blu,a,pn %ncc, .remcmp 916*5d9d9091SRichard Lowe sub %i1, %i0, %i1 ! i1 gets the difference 917*5d9d9091SRichard Lowe faligndata %d10, %d12, %d48 918*5d9d9091SRichard Lowe ldd [%i0], %d32 919*5d9d9091SRichard Lowe fcmpne32 %d32, %d48, %l1 920*5d9d9091SRichard Lowe fsrc1 %d32, %d32 921*5d9d9091SRichard Lowe fsrc1 %d32, %d32 922*5d9d9091SRichard Lowe fsrc1 %d32, %d32 923*5d9d9091SRichard Lowe brnz,a %l1, .remcmp 924*5d9d9091SRichard Lowe sub %i1, %i0, %i1 ! i1 gets the difference 925*5d9d9091SRichard Lowe add %i0, 8, %i0 926*5d9d9091SRichard Lowe add %i1, 8, %i1 927*5d9d9091SRichard Lowe sub %i2, 8, %i2 928*5d9d9091SRichard Lowe 929*5d9d9091SRichard Loweblkd12: 930*5d9d9091SRichard Lowe subcc %i4, 8, %i4 931*5d9d9091SRichard Lowe blu,a,pn %ncc, .remcmp 932*5d9d9091SRichard Lowe sub %i1, %i0, %i1 ! i1 gets the difference 933*5d9d9091SRichard Lowe faligndata %d12, %d14, %d48 934*5d9d9091SRichard Lowe ldd [%i0], %d32 935*5d9d9091SRichard Lowe fcmpne32 %d32, %d48, %l1 936*5d9d9091SRichard Lowe fsrc1 %d32, %d32 937*5d9d9091SRichard Lowe fsrc1 %d32, %d32 938*5d9d9091SRichard Lowe fsrc1 %d32, %d32 939*5d9d9091SRichard Lowe brnz,a %l1, .remcmp 940*5d9d9091SRichard Lowe sub %i1, %i0, %i1 ! i1 gets the difference 941*5d9d9091SRichard Lowe add %i0, 8, %i0 942*5d9d9091SRichard Lowe add %i1, 8, %i1 943*5d9d9091SRichard Lowe sub %i2, 8, %i2 944*5d9d9091SRichard Lowe 945*5d9d9091SRichard Loweblkd14: 946*5d9d9091SRichard Lowe subcc %i4, 8, %i4 947*5d9d9091SRichard Lowe blu,a,pn %ncc, .remcmp 948*5d9d9091SRichard Lowe sub %i1, %i0, %i1 ! i1 gets the difference 949*5d9d9091SRichard Lowe ba,pt %ncc, blkleft 950*5d9d9091SRichard Lowe fmovd %d14, %d0 951*5d9d9091SRichard Lowe 952*5d9d9091SRichard Loweblkd16: 953*5d9d9091SRichard Lowe subcc %i4, 8, %i4 954*5d9d9091SRichard Lowe blu,a,pn %ncc, .remcmp 955*5d9d9091SRichard Lowe sub %i1, %i0, %i1 ! i1 gets the difference 956*5d9d9091SRichard Lowe faligndata %d16, %d18, %d48 957*5d9d9091SRichard Lowe ldd [%i0], %d32 958*5d9d9091SRichard Lowe fcmpne32 %d32, %d48, %l1 959*5d9d9091SRichard Lowe fsrc1 %d32, %d32 960*5d9d9091SRichard Lowe fsrc1 %d32, %d32 961*5d9d9091SRichard Lowe fsrc1 %d32, %d32 962*5d9d9091SRichard Lowe brnz,a %l1, .remcmp 963*5d9d9091SRichard Lowe sub %i1, %i0, %i1 ! i1 gets the difference 964*5d9d9091SRichard Lowe add %i0, 8, %i0 965*5d9d9091SRichard Lowe add %i1, 8, %i1 966*5d9d9091SRichard Lowe sub %i2, 8, %i2 967*5d9d9091SRichard Lowe 968*5d9d9091SRichard Loweblkd18: 969*5d9d9091SRichard Lowe subcc %i4, 8, %i4 970*5d9d9091SRichard Lowe blu,a,pn %ncc, .remcmp 971*5d9d9091SRichard Lowe sub %i1, %i0, %i1 ! i1 gets the difference 972*5d9d9091SRichard Lowe faligndata %d18, %d20, %d48 973*5d9d9091SRichard Lowe ldd [%i0], %d32 974*5d9d9091SRichard Lowe fcmpne32 %d32, %d48, %l1 975*5d9d9091SRichard Lowe fsrc1 %d32, %d32 976*5d9d9091SRichard Lowe fsrc1 %d32, %d32 977*5d9d9091SRichard Lowe fsrc1 %d32, %d32 978*5d9d9091SRichard Lowe brnz,a %l1, .remcmp 979*5d9d9091SRichard Lowe sub %i1, %i0, %i1 ! i1 gets the difference 980*5d9d9091SRichard Lowe add %i0, 8, %i0 981*5d9d9091SRichard Lowe add %i1, 8, %i1 982*5d9d9091SRichard Lowe sub %i2, 8, %i2 983*5d9d9091SRichard Lowe 984*5d9d9091SRichard Loweblkd20: 985*5d9d9091SRichard Lowe subcc %i4, 8, %i4 986*5d9d9091SRichard Lowe blu,a,pn %ncc, .remcmp 987*5d9d9091SRichard Lowe sub %i1, %i0, %i1 ! i1 gets the difference 988*5d9d9091SRichard Lowe faligndata %d20, %d22, %d48 989*5d9d9091SRichard Lowe ldd [%i0], %d32 990*5d9d9091SRichard Lowe fcmpne32 %d32, %d48, %l1 991*5d9d9091SRichard Lowe fsrc1 %d32, %d32 992*5d9d9091SRichard Lowe fsrc1 %d32, %d32 993*5d9d9091SRichard Lowe fsrc1 %d32, %d32 994*5d9d9091SRichard Lowe brnz,a %l1, .remcmp 995*5d9d9091SRichard Lowe sub %i1, %i0, %i1 ! i1 gets the difference 996*5d9d9091SRichard Lowe add %i0, 8, %i0 997*5d9d9091SRichard Lowe add %i1, 8, %i1 998*5d9d9091SRichard Lowe sub %i2, 8, %i2 999*5d9d9091SRichard Lowe 1000*5d9d9091SRichard Loweblkd22: 1001*5d9d9091SRichard Lowe subcc %i4, 8, %i4 1002*5d9d9091SRichard Lowe blu,a,pn %ncc, .remcmp 1003*5d9d9091SRichard Lowe sub %i1, %i0, %i1 ! i1 gets the difference 1004*5d9d9091SRichard Lowe faligndata %d22, %d24, %d48 1005*5d9d9091SRichard Lowe ldd [%i0], %d32 1006*5d9d9091SRichard Lowe fcmpne32 %d32, %d48, %l1 1007*5d9d9091SRichard Lowe fsrc1 %d32, %d32 1008*5d9d9091SRichard Lowe fsrc1 %d32, %d32 1009*5d9d9091SRichard Lowe fsrc1 %d32, %d32 1010*5d9d9091SRichard Lowe brnz,a %l1, .remcmp 1011*5d9d9091SRichard Lowe sub %i1, %i0, %i1 ! i1 gets the difference 1012*5d9d9091SRichard Lowe add %i0, 8, %i0 1013*5d9d9091SRichard Lowe add %i1, 8, %i1 1014*5d9d9091SRichard Lowe sub %i2, 8, %i2 1015*5d9d9091SRichard Lowe 1016*5d9d9091SRichard Loweblkd24: 1017*5d9d9091SRichard Lowe subcc %i4, 8, %i4 1018*5d9d9091SRichard Lowe blu,a,pn %ncc, .remcmp 1019*5d9d9091SRichard Lowe sub %i1, %i0, %i1 ! i1 gets the difference 1020*5d9d9091SRichard Lowe faligndata %d24, %d26, %d48 1021*5d9d9091SRichard Lowe ldd [%i0], %d32 1022*5d9d9091SRichard Lowe fcmpne32 %d32, %d48, %l1 1023*5d9d9091SRichard Lowe fsrc1 %d32, %d32 1024*5d9d9091SRichard Lowe fsrc1 %d32, %d32 1025*5d9d9091SRichard Lowe fsrc1 %d32, %d32 1026*5d9d9091SRichard Lowe brnz,a %l1, .remcmp 1027*5d9d9091SRichard Lowe sub %i1, %i0, %i1 ! i1 gets the difference 1028*5d9d9091SRichard Lowe add %i0, 8, %i0 1029*5d9d9091SRichard Lowe add %i1, 8, %i1 1030*5d9d9091SRichard Lowe sub %i2, 8, %i2 1031*5d9d9091SRichard Lowe 1032*5d9d9091SRichard Loweblkd26: 1033*5d9d9091SRichard Lowe subcc %i4, 8, %i4 1034*5d9d9091SRichard Lowe blu,a,pn %ncc, .remcmp 1035*5d9d9091SRichard Lowe sub %i1, %i0, %i1 ! i1 gets the difference 1036*5d9d9091SRichard Lowe faligndata %d26, %d28, %d48 1037*5d9d9091SRichard Lowe ldd [%i0], %d32 1038*5d9d9091SRichard Lowe fcmpne32 %d32, %d48, %l1 1039*5d9d9091SRichard Lowe fsrc1 %d32, %d32 1040*5d9d9091SRichard Lowe fsrc1 %d32, %d32 1041*5d9d9091SRichard Lowe fsrc1 %d32, %d32 1042*5d9d9091SRichard Lowe brnz,a %l1, .remcmp 1043*5d9d9091SRichard Lowe sub %i1, %i0, %i1 ! i1 gets the difference 1044*5d9d9091SRichard Lowe add %i0, 8, %i0 1045*5d9d9091SRichard Lowe add %i1, 8, %i1 1046*5d9d9091SRichard Lowe sub %i2, 8, %i2 1047*5d9d9091SRichard Lowe 1048*5d9d9091SRichard Loweblkd28: 1049*5d9d9091SRichard Lowe subcc %i4, 8, %i4 1050*5d9d9091SRichard Lowe blu,a,pn %ncc, .remcmp 1051*5d9d9091SRichard Lowe sub %i1, %i0, %i1 ! i1 gets the difference 1052*5d9d9091SRichard Lowe faligndata %d28, %d30, %d48 1053*5d9d9091SRichard Lowe ldd [%i0], %d32 1054*5d9d9091SRichard Lowe fcmpne32 %d32, %d48, %l1 1055*5d9d9091SRichard Lowe fsrc1 %d32, %d32 1056*5d9d9091SRichard Lowe fsrc1 %d32, %d32 1057*5d9d9091SRichard Lowe fsrc1 %d32, %d32 1058*5d9d9091SRichard Lowe brnz,a %l1, .remcmp 1059*5d9d9091SRichard Lowe sub %i1, %i0, %i1 ! i1 gets the difference 1060*5d9d9091SRichard Lowe add %i0, 8, %i0 1061*5d9d9091SRichard Lowe add %i1, 8, %i1 1062*5d9d9091SRichard Lowe sub %i2, 8, %i2 1063*5d9d9091SRichard Lowe 1064*5d9d9091SRichard Loweblkd30: 1065*5d9d9091SRichard Lowe subcc %i4, 8, %i4 1066*5d9d9091SRichard Lowe blu,a,pn %ncc, .remcmp 1067*5d9d9091SRichard Lowe sub %i1, %i0, %i1 ! i1 gets the difference 1068*5d9d9091SRichard Lowe fmovd %d30, %d0 1069*5d9d9091SRichard Lowe 1070*5d9d9091SRichard Lowe ! This loop handles doubles remaining that were not loaded(ldda`ed) 1071*5d9d9091SRichard Lowe ! in the Block Compare loop 1072*5d9d9091SRichard Loweblkleft: 1073*5d9d9091SRichard Lowe ldd [%l7], %d2 1074*5d9d9091SRichard Lowe add %l7, 8, %l7 1075*5d9d9091SRichard Lowe faligndata %d0, %d2, %d8 1076*5d9d9091SRichard Lowe ldd [%i0], %d32 1077*5d9d9091SRichard Lowe fcmpne32 %d32, %d8, %l1 1078*5d9d9091SRichard Lowe fsrc1 %d2, %d0 1079*5d9d9091SRichard Lowe fsrc1 %d2, %d0 1080*5d9d9091SRichard Lowe fsrc1 %d2, %d0 1081*5d9d9091SRichard Lowe brnz,a %l1, .remcmp 1082*5d9d9091SRichard Lowe sub %i1, %i0, %i1 ! i1 gets the difference 1083*5d9d9091SRichard Lowe add %i0, 8, %i0 1084*5d9d9091SRichard Lowe add %i1, 8, %i1 1085*5d9d9091SRichard Lowe subcc %i4, 8, %i4 1086*5d9d9091SRichard Lowe bgeu,pt %ncc, blkleft 1087*5d9d9091SRichard Lowe sub %i2, 8, %i2 1088*5d9d9091SRichard Lowe 1089*5d9d9091SRichard Lowe ba %ncc, .remcmp 1090*5d9d9091SRichard Lowe sub %i1, %i0, %i1 ! i1 gets the difference 1091*5d9d9091SRichard Lowe 1092*5d9d9091SRichard Lowe6: ldub [%i0 + %i1], %i5 ! byte compare loop 1093*5d9d9091SRichard Lowe inc %i0 1094*5d9d9091SRichard Lowe cmp %i4, %i5 1095*5d9d9091SRichard Lowe bne %ncc, .bnoteq 1096*5d9d9091SRichard Lowe.remcmp: 1097*5d9d9091SRichard Lowe deccc %i2 1098*5d9d9091SRichard Lowe bgeu,a %ncc, 6b 1099*5d9d9091SRichard Lowe ldub [%i0], %i4 1100*5d9d9091SRichard Lowe 1101*5d9d9091SRichard Loweexit: 1102*5d9d9091SRichard Lowe and %l5, 0x4, %l5 ! fprs.du = fprs.dl = 0 1103*5d9d9091SRichard Lowe wr %l5, %g0, %fprs ! fprs = l5 - restore fprs 1104*5d9d9091SRichard Lowe membar #StoreLoad|#StoreStore 1105*5d9d9091SRichard Lowe ret 1106*5d9d9091SRichard Lowe restore %g0, %g0, %o0 1107*5d9d9091SRichard Lowe 1108*5d9d9091SRichard Lowe 1109*5d9d9091SRichard Lowe.bnoteq: 1110*5d9d9091SRichard Lowe and %l5, 0x4, %l5 ! fprs.du = fprs.dl = 0 1111*5d9d9091SRichard Lowe wr %l5, %g0, %fprs ! fprs = l5 - restore fprs 1112*5d9d9091SRichard Lowe membar #StoreLoad|#StoreStore 1113*5d9d9091SRichard Lowe sub %i4, %i5, %i0 ! return(*s1 - *s2) 1114*5d9d9091SRichard Lowe ret ! strings aren't equal 1115*5d9d9091SRichard Lowe restore %i0, %g0, %o0 1116*5d9d9091SRichard Lowe 1117*5d9d9091SRichard Lowe 1118*5d9d9091SRichard Lowe 1119*5d9d9091SRichard Lowe SET_SIZE(memcmp) 1120