1/* SPDX-License-Identifier: GPL-2.0+ 2 * 3 * $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $ 4 * 5 * INET An implementation of the TCP/IP protocol suite for the LINUX 6 * operating system. INET is implemented using the BSD Socket 7 * interface as the means of communication with the user level. 8 * 9 * IP/TCP/UDP checksumming routines 10 * 11 * Authors: Jorge Cwik, <jorge@laser.satlink.net> 12 * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 13 * Tom May, <ftom@netcom.com> 14 * Pentium Pro/II routines: 15 * Alexander Kjeldaas <astor@guardian.no> 16 * Finn Arne Gangstad <finnag@guardian.no> 17 * Lots of code moved from tcp.c and ip.c; see those files 18 * for more names. 19 * 20 * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception 21 * handling. 22 * Andi Kleen, add zeroing on error 23 * converted to pure assembler 24 * 25 * SuperH version: Copyright (C) 1999 Niibe Yutaka 26 */ 27 28#include <asm/errno.h> 29#include <linux/linkage.h> 30 31/* 32 * computes a partial checksum, e.g. for TCP/UDP fragments 33 */ 34 35/* 36 * asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum); 37 */ 38 39.text 40ENTRY(csum_partial) 41 /* 42 * Experiments with Ethernet and SLIP connections show that buff 43 * is aligned on either a 2-byte or 4-byte boundary. We get at 44 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. 45 * Fortunately, it is easy to convert 2-byte alignment to 4-byte 46 * alignment for the unrolled loop. 47 */ 48 mov r4, r0 49 tst #3, r0 ! Check alignment. 50 bt/s 2f ! Jump if alignment is ok. 51 mov r4, r7 ! Keep a copy to check for alignment 52 ! 53 tst #1, r0 ! Check alignment. 54 bt 21f ! Jump if alignment is boundary of 2bytes. 55 56 ! buf is odd 57 tst r5, r5 58 add #-1, r5 59 bt 9f 60 mov.b @r4+, r0 61 extu.b r0, r0 62 addc r0, r6 ! t=0 from previous tst 63 mov r6, r0 64 shll8 r6 65 shlr16 r0 66 shlr8 r0 67 or r0, r6 68 mov r4, r0 69 tst #2, r0 70 bt 2f 7121: 72 ! buf is 2 byte aligned (len could be 0) 73 add #-2, r5 ! Alignment uses up two bytes. 74 cmp/pz r5 ! 75 bt/s 1f ! Jump if we had at least two bytes. 76 clrt 77 bra 6f 78 add #2, r5 ! r5 was < 2. Deal with it. 791: 80 mov.w @r4+, r0 81 extu.w r0, r0 82 addc r0, r6 83 bf 2f 84 add #1, r6 852: 86 ! buf is 4 byte aligned (len could be 0) 87 mov r5, r1 88 mov #-5, r0 89 shld r0, r1 90 tst r1, r1 91 bt/s 4f ! if it's =0, go to 4f 92 clrt 93 .align 2 943: 95 mov.l @r4+, r0 96 mov.l @r4+, r2 97 mov.l @r4+, r3 98 addc r0, r6 99 mov.l @r4+, r0 100 addc r2, r6 101 mov.l @r4+, r2 102 addc r3, r6 103 mov.l @r4+, r3 104 addc r0, r6 105 mov.l @r4+, r0 106 addc r2, r6 107 mov.l @r4+, r2 108 addc r3, r6 109 addc r0, r6 110 addc r2, r6 111 movt r0 112 dt r1 113 bf/s 3b 114 cmp/eq #1, r0 115 ! here, we know r1==0 116 addc r1, r6 ! add carry to r6 1174: 118 mov r5, r0 119 and #0x1c, r0 120 tst r0, r0 121 bt 6f 122 ! 4 bytes or more remaining 123 mov r0, r1 124 shlr2 r1 125 mov #0, r2 1265: 127 addc r2, r6 128 mov.l @r4+, r2 129 movt r0 130 dt r1 131 bf/s 5b 132 cmp/eq #1, r0 133 addc r2, r6 134 addc r1, r6 ! r1==0 here, so it means add carry-bit 1356: 136 ! 3 bytes or less remaining 137 mov #3, r0 138 and r0, r5 139 tst r5, r5 140 bt 9f ! if it's =0 go to 9f 141 mov #2, r1 142 cmp/hs r1, r5 143 bf 7f 144 mov.w @r4+, r0 145 extu.w r0, r0 146 cmp/eq r1, r5 147 bt/s 8f 148 clrt 149 shll16 r0 150 addc r0, r6 1517: 152 mov.b @r4+, r0 153 extu.b r0, r0 154#ifndef __LITTLE_ENDIAN__ 155 shll8 r0 156#endif 1578: 158 addc r0, r6 159 mov #0, r0 160 addc r0, r6 1619: 162 ! Check if the buffer was misaligned, if so realign sum 163 mov r7, r0 164 tst #1, r0 165 bt 10f 166 mov r6, r0 167 shll8 r6 168 shlr16 r0 169 shlr8 r0 170 or r0, r6 17110: 172 rts 173 mov r6, r0 174 175/* 176unsigned int csum_partial_copy_generic (const char *src, char *dst, int len, 177 int sum, int *src_err_ptr, int *dst_err_ptr) 178 */ 179 180/* 181 * Copy from ds while checksumming, otherwise like csum_partial 182 * 183 * The macros SRC and DST specify the type of access for the instruction. 184 * thus we can call a custom exception handler for all access types. 185 * 186 * FIXME: could someone double-check whether I haven't mixed up some SRC and 187 * DST definitions? It's damn hard to trigger all cases. I hope I got 188 * them all but there's no guarantee. 189 */ 190 191#define SRC(...) \ 192 9999: __VA_ARGS__ ; \ 193 .section __ex_table, "a"; \ 194 .long 9999b, 6001f ; \ 195 .previous 196 197#define DST(...) \ 198 9999: __VA_ARGS__ ; \ 199 .section __ex_table, "a"; \ 200 .long 9999b, 6002f ; \ 201 .previous 202 203! 204! r4: const char *SRC 205! r5: char *DST 206! r6: int LEN 207! r7: int SUM 208! 209! on stack: 210! int *SRC_ERR_PTR 211! int *DST_ERR_PTR 212! 213ENTRY(csum_partial_copy_generic) 214 mov.l r5,@-r15 215 mov.l r6,@-r15 216 217 mov #3,r0 ! Check src and dest are equally aligned 218 mov r4,r1 219 and r0,r1 220 and r5,r0 221 cmp/eq r1,r0 222 bf 3f ! Different alignments, use slow version 223 tst #1,r0 ! Check dest word aligned 224 bf 3f ! If not, do it the slow way 225 226 mov #2,r0 227 tst r0,r5 ! Check dest alignment. 228 bt 2f ! Jump if alignment is ok. 229 add #-2,r6 ! Alignment uses up two bytes. 230 cmp/pz r6 ! Jump if we had at least two bytes. 231 bt/s 1f 232 clrt 233 add #2,r6 ! r6 was < 2. Deal with it. 234 bra 4f 235 mov r6,r2 236 2373: ! Handle different src and dest alignments. 238 ! This is not common, so simple byte by byte copy will do. 239 mov r6,r2 240 shlr r6 241 tst r6,r6 242 bt 4f 243 clrt 244 .align 2 2455: 246SRC( mov.b @r4+,r1 ) 247SRC( mov.b @r4+,r0 ) 248 extu.b r1,r1 249DST( mov.b r1,@r5 ) 250DST( mov.b r0,@(1,r5) ) 251 extu.b r0,r0 252 add #2,r5 253 254#ifdef __LITTLE_ENDIAN__ 255 shll8 r0 256#else 257 shll8 r1 258#endif 259 or r1,r0 260 261 addc r0,r7 262 movt r0 263 dt r6 264 bf/s 5b 265 cmp/eq #1,r0 266 mov #0,r0 267 addc r0, r7 268 269 mov r2, r0 270 tst #1, r0 271 bt 7f 272 bra 5f 273 clrt 274 275 ! src and dest equally aligned, but to a two byte boundary. 276 ! Handle first two bytes as a special case 277 .align 2 2781: 279SRC( mov.w @r4+,r0 ) 280DST( mov.w r0,@r5 ) 281 add #2,r5 282 extu.w r0,r0 283 addc r0,r7 284 mov #0,r0 285 addc r0,r7 2862: 287 mov r6,r2 288 mov #-5,r0 289 shld r0,r6 290 tst r6,r6 291 bt/s 2f 292 clrt 293 .align 2 2941: 295SRC( mov.l @r4+,r0 ) 296SRC( mov.l @r4+,r1 ) 297 addc r0,r7 298DST( mov.l r0,@r5 ) 299DST( mov.l r1,@(4,r5) ) 300 addc r1,r7 301 302SRC( mov.l @r4+,r0 ) 303SRC( mov.l @r4+,r1 ) 304 addc r0,r7 305DST( mov.l r0,@(8,r5) ) 306DST( mov.l r1,@(12,r5) ) 307 addc r1,r7 308 309SRC( mov.l @r4+,r0 ) 310SRC( mov.l @r4+,r1 ) 311 addc r0,r7 312DST( mov.l r0,@(16,r5) ) 313DST( mov.l r1,@(20,r5) ) 314 addc r1,r7 315 316SRC( mov.l @r4+,r0 ) 317SRC( mov.l @r4+,r1 ) 318 addc r0,r7 319DST( mov.l r0,@(24,r5) ) 320DST( mov.l r1,@(28,r5) ) 321 addc r1,r7 322 add #32,r5 323 movt r0 324 dt r6 325 bf/s 1b 326 cmp/eq #1,r0 327 mov #0,r0 328 addc r0,r7 329 3302: mov r2,r6 331 mov #0x1c,r0 332 and r0,r6 333 cmp/pl r6 334 bf/s 4f 335 clrt 336 shlr2 r6 3373: 338SRC( mov.l @r4+,r0 ) 339 addc r0,r7 340DST( mov.l r0,@r5 ) 341 add #4,r5 342 movt r0 343 dt r6 344 bf/s 3b 345 cmp/eq #1,r0 346 mov #0,r0 347 addc r0,r7 3484: mov r2,r6 349 mov #3,r0 350 and r0,r6 351 cmp/pl r6 352 bf 7f 353 mov #2,r1 354 cmp/hs r1,r6 355 bf 5f 356SRC( mov.w @r4+,r0 ) 357DST( mov.w r0,@r5 ) 358 extu.w r0,r0 359 add #2,r5 360 cmp/eq r1,r6 361 bt/s 6f 362 clrt 363 shll16 r0 364 addc r0,r7 3655: 366SRC( mov.b @r4+,r0 ) 367DST( mov.b r0,@r5 ) 368 extu.b r0,r0 369#ifndef __LITTLE_ENDIAN__ 370 shll8 r0 371#endif 3726: addc r0,r7 373 mov #0,r0 374 addc r0,r7 3757: 3765000: 377 378# Exception handler: 379.section .fixup, "ax" 380 3816001: 382 mov.l @(8,r15),r0 ! src_err_ptr 383 mov #-EFAULT,r1 384 mov.l r1,@r0 385 386 ! zero the complete destination - computing the rest 387 ! is too much work 388 mov.l @(4,r15),r5 ! dst 389 mov.l @r15,r6 ! len 390 mov #0,r7 3911: mov.b r7,@r5 392 dt r6 393 bf/s 1b 394 add #1,r5 395 mov.l 8000f,r0 396 jmp @r0 397 nop 398 .align 2 3998000: .long 5000b 400 4016002: 402 mov.l @(12,r15),r0 ! dst_err_ptr 403 mov #-EFAULT,r1 404 mov.l r1,@r0 405 mov.l 8001f,r0 406 jmp @r0 407 nop 408 .align 2 4098001: .long 5000b 410 411.previous 412 add #8,r15 413 rts 414 mov r7,r0 415