1/* $NetBSD: in_cksum_arm.S,v 1.2 2003/09/23 10:01:36 scw Exp $ */ 2 3/*- 4 * Copyright 2003 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Steve C. Woodford for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 * 37 */ 38 39/* 40 * Hand-optimised in_cksum() and in4_cksum() implementations for ARM/armv5e 41 */ 42 43#include "opt_inet.h" 44 45#include <machine/asm.h> 46#include "assym.inc" 47 .syntax unified 48/* 49 * int in_cksum(struct mbuf *m, int len) 50 * 51 * Entry: 52 * r0 m 53 * r1 len 54 * 55 * NOTE: Assumes 'm' is *never* NULL. 56 */ 57/* LINTSTUB: Func: int in_cksum(struct mbuf *, int) */ 58ENTRY(in_cksum) 59 stmfd sp!, {r4-r11,lr} 60 mov r8, #0x00 61 mov r9, r1 62 mov r10, #0x00 63 mov ip, r0 64 65.Lin_cksum_loop: 66 ldr r1, [ip, #(M_LEN)] 67 ldr r0, [ip, #(M_DATA)] 68 ldr ip, [ip, #(M_NEXT)] 69.Lin_cksum_entry4: 70 cmp r9, r1 71 movlt r1, r9 72 sub r9, r9, r1 73 eor r11, r10, r0 74 add r10, r10, r1 75 adds r2, r1, #0x00 76 blne _ASM_LABEL(L_cksumdata) 77 tst r11, #0x01 78 movne r2, r2, ror #8 79 adds r8, r8, r2 80 adc r8, r8, #0x00 81 cmp ip, #0x00 82 bne .Lin_cksum_loop 83 84 mov r1, #0xff 85 orr r1, r1, #0xff00 86 and r0, r8, r1 87 add r0, r0, r8, lsr #16 88 add r0, r0, r0, lsr #16 89 and r0, r0, r1 90 eor r0, r0, r1 91 ldmfd sp!, {r4-r11,pc} 92END(in_cksum) 93 94ENTRY(do_cksum) 95 stmfd sp!, {r4-r7, lr} 96 bl L_cksumdata 97 mov r0, r2 98 ldmfd sp!, {r4-r7, pc} 99END(do_cksum) 100 101/* 102 * The main in*_cksum() workhorse... 103 * 104 * Entry parameters: 105 * r0 Pointer to buffer 106 * r1 Buffer length 107 * lr Return address 108 * 109 * Returns: 110 * r2 Accumulated 32-bit sum 111 * 112 * Clobbers: 113 * r0-r7 114 */ 115/* LINTSTUB: Ignore */ 116ASENTRY_NP(L_cksumdata) 117 pld [r0] /* Pre-fetch the start of the buffer */ 118 mov r2, #0 119 120 /* We first have to word-align the buffer. */ 121 ands r7, r0, #0x03 122 beq .Lcksumdata_wordaligned 123 rsb r7, r7, #0x04 124 cmp r1, r7 /* Enough bytes left to make it? */ 125 blt .Lcksumdata_endgame 126 cmp r7, #0x02 127 ldrb r4, [r0], #0x01 /* Fetch 1st byte */ 128 ldrbge r5, [r0], #0x01 /* Fetch 2nd byte */ 129 movlt r5, #0x00 130 ldrbgt r6, [r0], #0x01 /* Fetch 3rd byte */ 131 movle r6, #0x00 132 133 /* Combine the three bytes depending on endianness and alignment */ 134 orreq r2, r4, r5, lsl #8 135 orreq r2, r2, r6, lsl #16 136 orrne r2, r5, r4, lsl #8 137 orrne r2, r2, r6, lsl #24 138 subs r1, r1, r7 /* Update length */ 139 RETeq /* All done? */ 140 141 /* Buffer is now word aligned */ 142.Lcksumdata_wordaligned: 143 cmp r1, #0x04 /* Less than 4 bytes left? */ 144 blt .Lcksumdata_endgame /* Yup */ 145 146 /* Now quad-align, if necessary */ 147 ands r7, r0, #0x04 148 ldrne r7, [r0], #0x04 149 subne r1, r1, #0x04 150 subs r1, r1, #0x40 151 blt .Lcksumdata_bigloop_end /* Note: C flag clear if branch taken */ 152 153 /* 154 * Buffer is now quad aligned. Sum 64 bytes at a time. 155 * Note: First ldrd is hoisted above the loop, together with 156 * setting r6 to zero to avoid stalling for results in the 157 * loop. (r7 is live, from above). 158 */ 159 ldrd r4, [r0], #0x08 160 mov r6, #0x00 161.Lcksumdata_bigloop: 162 pld [r0, #0x18] 163 adds r2, r2, r6 164 adcs r2, r2, r7 165 ldrd r6, [r0], #0x08 166 adcs r2, r2, r4 167 adcs r2, r2, r5 168 ldrd r4, [r0], #0x08 169 adcs r2, r2, r6 170 adcs r2, r2, r7 171 ldrd r6, [r0], #0x08 172 adcs r2, r2, r4 173 adcs r2, r2, r5 174 ldrd r4, [r0], #0x08 175 adcs r2, r2, r6 176 adcs r2, r2, r7 177 pld [r0, #0x18] 178 ldrd r6, [r0], #0x08 179 adcs r2, r2, r4 180 adcs r2, r2, r5 181 ldrd r4, [r0], #0x08 182 adcs r2, r2, r6 183 adcs r2, r2, r7 184 ldrd r6, [r0], #0x08 185 adcs r2, r2, r4 186 adcs r2, r2, r5 187 adc r2, r2, #0x00 188 subs r1, r1, #0x40 189 ldrdge r4, [r0], #0x08 190 bge .Lcksumdata_bigloop 191 192 adds r2, r2, r6 /* r6/r7 still need summing */ 193.Lcksumdata_bigloop_end: 194 adcs r2, r2, r7 195 adc r2, r2, #0x00 196 197 adds r1, r1, #0x40 198 RETeq 199 cmp r1, #0x20 200 201 ldrdge r4, [r0], #0x08 /* Avoid stalling pld and result */ 202 blt .Lcksumdata_less_than_32 203 pld [r0, #0x18] 204 ldrd r6, [r0], #0x08 205 adds r2, r2, r4 206 adcs r2, r2, r5 207 ldrd r4, [r0], #0x08 208 adcs r2, r2, r6 209 adcs r2, r2, r7 210 ldrd r6, [r0], #0x08 211 adcs r2, r2, r4 212 adcs r2, r2, r5 213 adcs r2, r2, r6 /* XXX: Unavoidable result stall */ 214 adcs r2, r2, r7 215 adc r2, r2, #0x00 216 subs r1, r1, #0x20 217 RETeq 218 219.Lcksumdata_less_than_32: 220 /* There are less than 32 bytes left */ 221 and r3, r1, #0x18 222 rsb r4, r3, #0x18 223 sub r1, r1, r3 224 adds r4, r4, r4, lsr #1 /* Side effect: Clear carry flag */ 225 addne pc, pc, r4 226 nop 227 228/* 229 * Note: We use ldm here, even on armv5e, since the combined issue/result 230 * latencies for ldm and ldrd are the same. Using ldm avoids needless #ifdefs. 231 */ 232 /* At least 24 bytes remaining... */ 233 ldmia r0!, {r4, r5} 234 adcs r2, r2, r4 235 adcs r2, r2, r5 236 237 /* At least 16 bytes remaining... */ 238 ldmia r0!, {r4, r5} 239 adcs r2, r2, r4 240 adcs r2, r2, r5 241 242 /* At least 8 bytes remaining... */ 243 ldmia r0!, {r4, r5} 244 adcs r2, r2, r4 245 adcs r2, r2, r5 246 247 /* Less than 8 bytes remaining... */ 248 adc r2, r2, #0x00 249 subs r1, r1, #0x04 250 blt .Lcksumdata_lessthan4 251 252 ldr r4, [r0], #0x04 253 sub r1, r1, #0x04 254 adds r2, r2, r4 255 adc r2, r2, #0x00 256 257 /* Deal with < 4 bytes remaining */ 258.Lcksumdata_lessthan4: 259 adds r1, r1, #0x04 260 RETeq 261 262 /* Deal with 1 to 3 remaining bytes, possibly misaligned */ 263.Lcksumdata_endgame: 264 ldrb r3, [r0] /* Fetch first byte */ 265 cmp r1, #0x02 266 ldrbge r4, [r0, #0x01] /* Fetch 2nd and 3rd as necessary */ 267 movlt r4, #0x00 268 ldrbgt r5, [r0, #0x02] 269 movle r5, #0x00 270 /* Combine the three bytes depending on endianness and alignment */ 271 tst r0, #0x01 272 orreq r3, r3, r4, lsl #8 273 orreq r3, r3, r5, lsl #16 274 orrne r3, r4, r3, lsl #8 275 orrne r3, r3, r5, lsl #24 276 adds r2, r2, r3 277 adc r2, r2, #0x00 278 RET 279END(L_cksumdata) 280 281