1/* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * IP/TCP/UDP checksumming routines 7 * 8 * Authors: Jorge Cwik, <jorge@laser.satlink.net> 9 * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 10 * Tom May, <ftom@netcom.com> 11 * Pentium Pro/II routines: 12 * Alexander Kjeldaas <astor@guardian.no> 13 * Finn Arne Gangstad <finnag@guardian.no> 14 * Lots of code moved from tcp.c and ip.c; see those files 15 * for more names. 16 * 17 * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception 18 * handling. 19 * Andi Kleen, add zeroing on error 20 * converted to pure assembler 21 * 22 * This program is free software; you can redistribute it and/or 23 * modify it under the terms of the GNU General Public License 24 * as published by the Free Software Foundation; either version 25 * 2 of the License, or (at your option) any later version. 26 */ 27 28#include <linux/linkage.h> 29#include <asm/errno.h> 30#include <asm/asm.h> 31#include <asm/export.h> 32 33/* 34 * computes a partial checksum, e.g. for TCP/UDP fragments 35 */ 36 37/* 38unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) 39 */ 40 41.text 42 43#ifndef CONFIG_X86_USE_PPRO_CHECKSUM 44 45 /* 46 * Experiments with Ethernet and SLIP connections show that buff 47 * is aligned on either a 2-byte or 4-byte boundary. We get at 48 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. 49 * Fortunately, it is easy to convert 2-byte alignment to 4-byte 50 * alignment for the unrolled loop. 51 */ 52ENTRY(csum_partial) 53 pushl %esi 54 pushl %ebx 55 movl 20(%esp),%eax # Function arg: unsigned int sum 56 movl 16(%esp),%ecx # Function arg: int len 57 movl 12(%esp),%esi # Function arg: unsigned char *buff 58 testl $3, %esi # Check alignment. 59 jz 2f # Jump if alignment is ok. 60 testl $1, %esi # Check alignment. 61 jz 10f # Jump if alignment is boundary of 2 bytes. 62 63 # buf is odd 64 dec %ecx 65 jl 8f 66 movzbl (%esi), %ebx 67 adcl %ebx, %eax 68 roll $8, %eax 69 inc %esi 70 testl $2, %esi 71 jz 2f 7210: 73 subl $2, %ecx # Alignment uses up two bytes. 74 jae 1f # Jump if we had at least two bytes. 75 addl $2, %ecx # ecx was < 2. Deal with it. 76 jmp 4f 771: movw (%esi), %bx 78 addl $2, %esi 79 addw %bx, %ax 80 adcl $0, %eax 812: 82 movl %ecx, %edx 83 shrl $5, %ecx 84 jz 2f 85 testl %esi, %esi 861: movl (%esi), %ebx 87 adcl %ebx, %eax 88 movl 4(%esi), %ebx 89 adcl %ebx, %eax 90 movl 8(%esi), %ebx 91 adcl %ebx, %eax 92 movl 12(%esi), %ebx 93 adcl %ebx, %eax 94 movl 16(%esi), %ebx 95 adcl %ebx, %eax 96 movl 20(%esi), %ebx 97 adcl %ebx, %eax 98 movl 24(%esi), %ebx 99 adcl %ebx, %eax 100 movl 28(%esi), %ebx 101 adcl %ebx, %eax 102 lea 32(%esi), %esi 103 dec %ecx 104 jne 1b 105 adcl $0, %eax 1062: movl %edx, %ecx 107 andl $0x1c, %edx 108 je 4f 109 shrl $2, %edx # This clears CF 1103: adcl (%esi), %eax 111 lea 4(%esi), %esi 112 dec %edx 113 jne 3b 114 adcl $0, %eax 1154: andl $3, %ecx 116 jz 7f 117 cmpl $2, %ecx 118 jb 5f 119 movw (%esi),%cx 120 leal 2(%esi),%esi 121 je 6f 122 shll $16,%ecx 1235: movb (%esi),%cl 1246: addl %ecx,%eax 125 adcl $0, %eax 1267: 127 testb $1, 12(%esp) 128 jz 8f 129 roll $8, %eax 1308: 131 popl %ebx 132 popl %esi 133 ret 134ENDPROC(csum_partial) 135 136#else 137 138/* Version for PentiumII/PPro */ 139 140ENTRY(csum_partial) 141 pushl %esi 142 pushl %ebx 143 movl 20(%esp),%eax # Function arg: unsigned int sum 144 movl 16(%esp),%ecx # Function arg: int len 145 movl 12(%esp),%esi # Function arg: const unsigned char *buf 146 147 testl $3, %esi 148 jnz 25f 14910: 150 movl %ecx, %edx 151 movl %ecx, %ebx 152 andl $0x7c, %ebx 153 shrl $7, %ecx 154 addl %ebx,%esi 155 shrl $2, %ebx 156 negl %ebx 157 lea 45f(%ebx,%ebx,2), %ebx 158 testl %esi, %esi 159 jmp *%ebx 160 161 # Handle 2-byte-aligned regions 16220: addw (%esi), %ax 163 lea 2(%esi), %esi 164 adcl $0, %eax 165 jmp 10b 16625: 167 testl $1, %esi 168 jz 30f 169 # buf is odd 170 dec %ecx 171 jl 90f 172 movzbl (%esi), %ebx 173 addl %ebx, %eax 174 adcl $0, %eax 175 roll $8, %eax 176 inc %esi 177 testl $2, %esi 178 jz 10b 179 18030: subl $2, %ecx 181 ja 20b 182 je 32f 183 addl $2, %ecx 184 jz 80f 185 movzbl (%esi),%ebx # csumming 1 byte, 2-aligned 186 addl %ebx, %eax 187 adcl $0, %eax 188 jmp 80f 18932: 190 addw (%esi), %ax # csumming 2 bytes, 2-aligned 191 adcl $0, %eax 192 jmp 80f 193 19440: 195 addl -128(%esi), %eax 196 adcl -124(%esi), %eax 197 adcl -120(%esi), %eax 198 adcl -116(%esi), %eax 199 adcl -112(%esi), %eax 200 adcl -108(%esi), %eax 201 adcl -104(%esi), %eax 202 adcl -100(%esi), %eax 203 adcl -96(%esi), %eax 204 adcl -92(%esi), %eax 205 adcl -88(%esi), %eax 206 adcl -84(%esi), %eax 207 adcl -80(%esi), %eax 208 adcl -76(%esi), %eax 209 adcl -72(%esi), %eax 210 adcl -68(%esi), %eax 211 adcl -64(%esi), %eax 212 adcl -60(%esi), %eax 213 adcl -56(%esi), %eax 214 adcl -52(%esi), %eax 215 adcl -48(%esi), %eax 216 adcl -44(%esi), %eax 217 adcl -40(%esi), %eax 218 adcl -36(%esi), %eax 219 adcl -32(%esi), %eax 220 adcl -28(%esi), %eax 221 adcl -24(%esi), %eax 222 adcl -20(%esi), %eax 223 adcl -16(%esi), %eax 224 adcl -12(%esi), %eax 225 adcl -8(%esi), %eax 226 adcl -4(%esi), %eax 22745: 228 lea 128(%esi), %esi 229 adcl $0, %eax 230 dec %ecx 231 jge 40b 232 movl %edx, %ecx 23350: andl $3, %ecx 234 jz 80f 235 236 # Handle the last 1-3 bytes without jumping 237 notl %ecx # 1->2, 2->1, 3->0, higher bits are masked 238 movl $0xffffff,%ebx # by the shll and shrl instructions 239 shll $3,%ecx 240 shrl %cl,%ebx 241 andl -128(%esi),%ebx # esi is 4-aligned so should be ok 242 addl %ebx,%eax 243 adcl $0,%eax 24480: 245 testb $1, 12(%esp) 246 jz 90f 247 roll $8, %eax 24890: 249 popl %ebx 250 popl %esi 251 ret 252ENDPROC(csum_partial) 253 254#endif 255EXPORT_SYMBOL(csum_partial) 256 257/* 258unsigned int csum_partial_copy_generic (const char *src, char *dst, 259 int len, int sum, int *src_err_ptr, int *dst_err_ptr) 260 */ 261 262/* 263 * Copy from ds while checksumming, otherwise like csum_partial 264 * 265 * The macros SRC and DST specify the type of access for the instruction. 266 * thus we can call a custom exception handler for all access types. 267 * 268 * FIXME: could someone double-check whether I haven't mixed up some SRC and 269 * DST definitions? It's damn hard to trigger all cases. I hope I got 270 * them all but there's no guarantee. 271 */ 272 273#define SRC(y...) \ 274 9999: y; \ 275 _ASM_EXTABLE(9999b, 6001f) 276 277#define DST(y...) \ 278 9999: y; \ 279 _ASM_EXTABLE(9999b, 6002f) 280 281#ifndef CONFIG_X86_USE_PPRO_CHECKSUM 282 283#define ARGBASE 16 284#define FP 12 285 286ENTRY(csum_partial_copy_generic) 287 subl $4,%esp 288 pushl %edi 289 pushl %esi 290 pushl %ebx 291 movl ARGBASE+16(%esp),%eax # sum 292 movl ARGBASE+12(%esp),%ecx # len 293 movl ARGBASE+4(%esp),%esi # src 294 movl ARGBASE+8(%esp),%edi # dst 295 296 testl $2, %edi # Check alignment. 297 jz 2f # Jump if alignment is ok. 298 subl $2, %ecx # Alignment uses up two bytes. 299 jae 1f # Jump if we had at least two bytes. 300 addl $2, %ecx # ecx was < 2. Deal with it. 301 jmp 4f 302SRC(1: movw (%esi), %bx ) 303 addl $2, %esi 304DST( movw %bx, (%edi) ) 305 addl $2, %edi 306 addw %bx, %ax 307 adcl $0, %eax 3082: 309 movl %ecx, FP(%esp) 310 shrl $5, %ecx 311 jz 2f 312 testl %esi, %esi 313SRC(1: movl (%esi), %ebx ) 314SRC( movl 4(%esi), %edx ) 315 adcl %ebx, %eax 316DST( movl %ebx, (%edi) ) 317 adcl %edx, %eax 318DST( movl %edx, 4(%edi) ) 319 320SRC( movl 8(%esi), %ebx ) 321SRC( movl 12(%esi), %edx ) 322 adcl %ebx, %eax 323DST( movl %ebx, 8(%edi) ) 324 adcl %edx, %eax 325DST( movl %edx, 12(%edi) ) 326 327SRC( movl 16(%esi), %ebx ) 328SRC( movl 20(%esi), %edx ) 329 adcl %ebx, %eax 330DST( movl %ebx, 16(%edi) ) 331 adcl %edx, %eax 332DST( movl %edx, 20(%edi) ) 333 334SRC( movl 24(%esi), %ebx ) 335SRC( movl 28(%esi), %edx ) 336 adcl %ebx, %eax 337DST( movl %ebx, 24(%edi) ) 338 adcl %edx, %eax 339DST( movl %edx, 28(%edi) ) 340 341 lea 32(%esi), %esi 342 lea 32(%edi), %edi 343 dec %ecx 344 jne 1b 345 adcl $0, %eax 3462: movl FP(%esp), %edx 347 movl %edx, %ecx 348 andl $0x1c, %edx 349 je 4f 350 shrl $2, %edx # This clears CF 351SRC(3: movl (%esi), %ebx ) 352 adcl %ebx, %eax 353DST( movl %ebx, (%edi) ) 354 lea 4(%esi), %esi 355 lea 4(%edi), %edi 356 dec %edx 357 jne 3b 358 adcl $0, %eax 3594: andl $3, %ecx 360 jz 7f 361 cmpl $2, %ecx 362 jb 5f 363SRC( movw (%esi), %cx ) 364 leal 2(%esi), %esi 365DST( movw %cx, (%edi) ) 366 leal 2(%edi), %edi 367 je 6f 368 shll $16,%ecx 369SRC(5: movb (%esi), %cl ) 370DST( movb %cl, (%edi) ) 3716: addl %ecx, %eax 372 adcl $0, %eax 3737: 3745000: 375 376# Exception handler: 377.section .fixup, "ax" 378 3796001: 380 movl ARGBASE+20(%esp), %ebx # src_err_ptr 381 movl $-EFAULT, (%ebx) 382 383 # zero the complete destination - computing the rest 384 # is too much work 385 movl ARGBASE+8(%esp), %edi # dst 386 movl ARGBASE+12(%esp), %ecx # len 387 xorl %eax,%eax 388 rep ; stosb 389 390 jmp 5000b 391 3926002: 393 movl ARGBASE+24(%esp), %ebx # dst_err_ptr 394 movl $-EFAULT,(%ebx) 395 jmp 5000b 396 397.previous 398 399 popl %ebx 400 popl %esi 401 popl %edi 402 popl %ecx # equivalent to addl $4,%esp 403 ret 404ENDPROC(csum_partial_copy_generic) 405 406#else 407 408/* Version for PentiumII/PPro */ 409 410#define ROUND1(x) \ 411 SRC(movl x(%esi), %ebx ) ; \ 412 addl %ebx, %eax ; \ 413 DST(movl %ebx, x(%edi) ) ; 414 415#define ROUND(x) \ 416 SRC(movl x(%esi), %ebx ) ; \ 417 adcl %ebx, %eax ; \ 418 DST(movl %ebx, x(%edi) ) ; 419 420#define ARGBASE 12 421 422ENTRY(csum_partial_copy_generic) 423 pushl %ebx 424 pushl %edi 425 pushl %esi 426 movl ARGBASE+4(%esp),%esi #src 427 movl ARGBASE+8(%esp),%edi #dst 428 movl ARGBASE+12(%esp),%ecx #len 429 movl ARGBASE+16(%esp),%eax #sum 430# movl %ecx, %edx 431 movl %ecx, %ebx 432 movl %esi, %edx 433 shrl $6, %ecx 434 andl $0x3c, %ebx 435 negl %ebx 436 subl %ebx, %esi 437 subl %ebx, %edi 438 lea -1(%esi),%edx 439 andl $-32,%edx 440 lea 3f(%ebx,%ebx), %ebx 441 testl %esi, %esi 442 jmp *%ebx 4431: addl $64,%esi 444 addl $64,%edi 445 SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) 446 ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52) 447 ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36) 448 ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20) 449 ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4) 4503: adcl $0,%eax 451 addl $64, %edx 452 dec %ecx 453 jge 1b 4544: movl ARGBASE+12(%esp),%edx #len 455 andl $3, %edx 456 jz 7f 457 cmpl $2, %edx 458 jb 5f 459SRC( movw (%esi), %dx ) 460 leal 2(%esi), %esi 461DST( movw %dx, (%edi) ) 462 leal 2(%edi), %edi 463 je 6f 464 shll $16,%edx 4655: 466SRC( movb (%esi), %dl ) 467DST( movb %dl, (%edi) ) 4686: addl %edx, %eax 469 adcl $0, %eax 4707: 471.section .fixup, "ax" 4726001: movl ARGBASE+20(%esp), %ebx # src_err_ptr 473 movl $-EFAULT, (%ebx) 474 # zero the complete destination (computing the rest is too much work) 475 movl ARGBASE+8(%esp),%edi # dst 476 movl ARGBASE+12(%esp),%ecx # len 477 xorl %eax,%eax 478 rep; stosb 479 jmp 7b 4806002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr 481 movl $-EFAULT, (%ebx) 482 jmp 7b 483.previous 484 485 popl %esi 486 popl %edi 487 popl %ebx 488 ret 489ENDPROC(csum_partial_copy_generic) 490 491#undef ROUND 492#undef ROUND1 493 494#endif 495EXPORT_SYMBOL(csum_partial_copy_generic) 496