1/* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * IP/TCP/UDP checksumming routines 7 * 8 * Authors: Jorge Cwik, <jorge@laser.satlink.net> 9 * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 10 * Tom May, <ftom@netcom.com> 11 * Pentium Pro/II routines: 12 * Alexander Kjeldaas <astor@guardian.no> 13 * Finn Arne Gangstad <finnag@guardian.no> 14 * Lots of code moved from tcp.c and ip.c; see those files 15 * for more names. 16 * 17 * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception 18 * handling. 19 * Andi Kleen, add zeroing on error 20 * converted to pure assembler 21 * 22 * This program is free software; you can redistribute it and/or 23 * modify it under the terms of the GNU General Public License 24 * as published by the Free Software Foundation; either version 25 * 2 of the License, or (at your option) any later version. 26 */ 27 28#include <linux/linkage.h> 29#include <asm/errno.h> 30#include <asm/asm.h> 31#include <asm/export.h> 32#include <asm/nospec-branch.h> 33 34/* 35 * computes a partial checksum, e.g. for TCP/UDP fragments 36 */ 37 38/* 39unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) 40 */ 41 42.text 43 44#ifndef CONFIG_X86_USE_PPRO_CHECKSUM 45 46 /* 47 * Experiments with Ethernet and SLIP connections show that buff 48 * is aligned on either a 2-byte or 4-byte boundary. We get at 49 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. 50 * Fortunately, it is easy to convert 2-byte alignment to 4-byte 51 * alignment for the unrolled loop. 52 */ 53ENTRY(csum_partial) 54 pushl %esi 55 pushl %ebx 56 movl 20(%esp),%eax # Function arg: unsigned int sum 57 movl 16(%esp),%ecx # Function arg: int len 58 movl 12(%esp),%esi # Function arg: unsigned char *buff 59 testl $3, %esi # Check alignment. 60 jz 2f # Jump if alignment is ok. 61 testl $1, %esi # Check alignment. 62 jz 10f # Jump if alignment is boundary of 2 bytes. 63 64 # buf is odd 65 dec %ecx 66 jl 8f 67 movzbl (%esi), %ebx 68 adcl %ebx, %eax 69 roll $8, %eax 70 inc %esi 71 testl $2, %esi 72 jz 2f 7310: 74 subl $2, %ecx # Alignment uses up two bytes. 75 jae 1f # Jump if we had at least two bytes. 76 addl $2, %ecx # ecx was < 2. Deal with it. 77 jmp 4f 781: movw (%esi), %bx 79 addl $2, %esi 80 addw %bx, %ax 81 adcl $0, %eax 822: 83 movl %ecx, %edx 84 shrl $5, %ecx 85 jz 2f 86 testl %esi, %esi 871: movl (%esi), %ebx 88 adcl %ebx, %eax 89 movl 4(%esi), %ebx 90 adcl %ebx, %eax 91 movl 8(%esi), %ebx 92 adcl %ebx, %eax 93 movl 12(%esi), %ebx 94 adcl %ebx, %eax 95 movl 16(%esi), %ebx 96 adcl %ebx, %eax 97 movl 20(%esi), %ebx 98 adcl %ebx, %eax 99 movl 24(%esi), %ebx 100 adcl %ebx, %eax 101 movl 28(%esi), %ebx 102 adcl %ebx, %eax 103 lea 32(%esi), %esi 104 dec %ecx 105 jne 1b 106 adcl $0, %eax 1072: movl %edx, %ecx 108 andl $0x1c, %edx 109 je 4f 110 shrl $2, %edx # This clears CF 1113: adcl (%esi), %eax 112 lea 4(%esi), %esi 113 dec %edx 114 jne 3b 115 adcl $0, %eax 1164: andl $3, %ecx 117 jz 7f 118 cmpl $2, %ecx 119 jb 5f 120 movw (%esi),%cx 121 leal 2(%esi),%esi 122 je 6f 123 shll $16,%ecx 1245: movb (%esi),%cl 1256: addl %ecx,%eax 126 adcl $0, %eax 1277: 128 testb $1, 12(%esp) 129 jz 8f 130 roll $8, %eax 1318: 132 popl %ebx 133 popl %esi 134 ret 135ENDPROC(csum_partial) 136 137#else 138 139/* Version for PentiumII/PPro */ 140 141ENTRY(csum_partial) 142 pushl %esi 143 pushl %ebx 144 movl 20(%esp),%eax # Function arg: unsigned int sum 145 movl 16(%esp),%ecx # Function arg: int len 146 movl 12(%esp),%esi # Function arg: const unsigned char *buf 147 148 testl $3, %esi 149 jnz 25f 15010: 151 movl %ecx, %edx 152 movl %ecx, %ebx 153 andl $0x7c, %ebx 154 shrl $7, %ecx 155 addl %ebx,%esi 156 shrl $2, %ebx 157 negl %ebx 158 lea 45f(%ebx,%ebx,2), %ebx 159 testl %esi, %esi 160 JMP_NOSPEC %ebx 161 162 # Handle 2-byte-aligned regions 16320: addw (%esi), %ax 164 lea 2(%esi), %esi 165 adcl $0, %eax 166 jmp 10b 16725: 168 testl $1, %esi 169 jz 30f 170 # buf is odd 171 dec %ecx 172 jl 90f 173 movzbl (%esi), %ebx 174 addl %ebx, %eax 175 adcl $0, %eax 176 roll $8, %eax 177 inc %esi 178 testl $2, %esi 179 jz 10b 180 18130: subl $2, %ecx 182 ja 20b 183 je 32f 184 addl $2, %ecx 185 jz 80f 186 movzbl (%esi),%ebx # csumming 1 byte, 2-aligned 187 addl %ebx, %eax 188 adcl $0, %eax 189 jmp 80f 19032: 191 addw (%esi), %ax # csumming 2 bytes, 2-aligned 192 adcl $0, %eax 193 jmp 80f 194 19540: 196 addl -128(%esi), %eax 197 adcl -124(%esi), %eax 198 adcl -120(%esi), %eax 199 adcl -116(%esi), %eax 200 adcl -112(%esi), %eax 201 adcl -108(%esi), %eax 202 adcl -104(%esi), %eax 203 adcl -100(%esi), %eax 204 adcl -96(%esi), %eax 205 adcl -92(%esi), %eax 206 adcl -88(%esi), %eax 207 adcl -84(%esi), %eax 208 adcl -80(%esi), %eax 209 adcl -76(%esi), %eax 210 adcl -72(%esi), %eax 211 adcl -68(%esi), %eax 212 adcl -64(%esi), %eax 213 adcl -60(%esi), %eax 214 adcl -56(%esi), %eax 215 adcl -52(%esi), %eax 216 adcl -48(%esi), %eax 217 adcl -44(%esi), %eax 218 adcl -40(%esi), %eax 219 adcl -36(%esi), %eax 220 adcl -32(%esi), %eax 221 adcl -28(%esi), %eax 222 adcl -24(%esi), %eax 223 adcl -20(%esi), %eax 224 adcl -16(%esi), %eax 225 adcl -12(%esi), %eax 226 adcl -8(%esi), %eax 227 adcl -4(%esi), %eax 22845: 229 lea 128(%esi), %esi 230 adcl $0, %eax 231 dec %ecx 232 jge 40b 233 movl %edx, %ecx 23450: andl $3, %ecx 235 jz 80f 236 237 # Handle the last 1-3 bytes without jumping 238 notl %ecx # 1->2, 2->1, 3->0, higher bits are masked 239 movl $0xffffff,%ebx # by the shll and shrl instructions 240 shll $3,%ecx 241 shrl %cl,%ebx 242 andl -128(%esi),%ebx # esi is 4-aligned so should be ok 243 addl %ebx,%eax 244 adcl $0,%eax 24580: 246 testb $1, 12(%esp) 247 jz 90f 248 roll $8, %eax 24990: 250 popl %ebx 251 popl %esi 252 ret 253ENDPROC(csum_partial) 254 255#endif 256EXPORT_SYMBOL(csum_partial) 257 258/* 259unsigned int csum_partial_copy_generic (const char *src, char *dst, 260 int len, int sum, int *src_err_ptr, int *dst_err_ptr) 261 */ 262 263/* 264 * Copy from ds while checksumming, otherwise like csum_partial 265 * 266 * The macros SRC and DST specify the type of access for the instruction. 267 * thus we can call a custom exception handler for all access types. 268 * 269 * FIXME: could someone double-check whether I haven't mixed up some SRC and 270 * DST definitions? It's damn hard to trigger all cases. I hope I got 271 * them all but there's no guarantee. 272 */ 273 274#define SRC(y...) \ 275 9999: y; \ 276 _ASM_EXTABLE(9999b, 6001f) 277 278#define DST(y...) \ 279 9999: y; \ 280 _ASM_EXTABLE(9999b, 6002f) 281 282#ifndef CONFIG_X86_USE_PPRO_CHECKSUM 283 284#define ARGBASE 16 285#define FP 12 286 287ENTRY(csum_partial_copy_generic) 288 subl $4,%esp 289 pushl %edi 290 pushl %esi 291 pushl %ebx 292 movl ARGBASE+16(%esp),%eax # sum 293 movl ARGBASE+12(%esp),%ecx # len 294 movl ARGBASE+4(%esp),%esi # src 295 movl ARGBASE+8(%esp),%edi # dst 296 297 testl $2, %edi # Check alignment. 298 jz 2f # Jump if alignment is ok. 299 subl $2, %ecx # Alignment uses up two bytes. 300 jae 1f # Jump if we had at least two bytes. 301 addl $2, %ecx # ecx was < 2. Deal with it. 302 jmp 4f 303SRC(1: movw (%esi), %bx ) 304 addl $2, %esi 305DST( movw %bx, (%edi) ) 306 addl $2, %edi 307 addw %bx, %ax 308 adcl $0, %eax 3092: 310 movl %ecx, FP(%esp) 311 shrl $5, %ecx 312 jz 2f 313 testl %esi, %esi 314SRC(1: movl (%esi), %ebx ) 315SRC( movl 4(%esi), %edx ) 316 adcl %ebx, %eax 317DST( movl %ebx, (%edi) ) 318 adcl %edx, %eax 319DST( movl %edx, 4(%edi) ) 320 321SRC( movl 8(%esi), %ebx ) 322SRC( movl 12(%esi), %edx ) 323 adcl %ebx, %eax 324DST( movl %ebx, 8(%edi) ) 325 adcl %edx, %eax 326DST( movl %edx, 12(%edi) ) 327 328SRC( movl 16(%esi), %ebx ) 329SRC( movl 20(%esi), %edx ) 330 adcl %ebx, %eax 331DST( movl %ebx, 16(%edi) ) 332 adcl %edx, %eax 333DST( movl %edx, 20(%edi) ) 334 335SRC( movl 24(%esi), %ebx ) 336SRC( movl 28(%esi), %edx ) 337 adcl %ebx, %eax 338DST( movl %ebx, 24(%edi) ) 339 adcl %edx, %eax 340DST( movl %edx, 28(%edi) ) 341 342 lea 32(%esi), %esi 343 lea 32(%edi), %edi 344 dec %ecx 345 jne 1b 346 adcl $0, %eax 3472: movl FP(%esp), %edx 348 movl %edx, %ecx 349 andl $0x1c, %edx 350 je 4f 351 shrl $2, %edx # This clears CF 352SRC(3: movl (%esi), %ebx ) 353 adcl %ebx, %eax 354DST( movl %ebx, (%edi) ) 355 lea 4(%esi), %esi 356 lea 4(%edi), %edi 357 dec %edx 358 jne 3b 359 adcl $0, %eax 3604: andl $3, %ecx 361 jz 7f 362 cmpl $2, %ecx 363 jb 5f 364SRC( movw (%esi), %cx ) 365 leal 2(%esi), %esi 366DST( movw %cx, (%edi) ) 367 leal 2(%edi), %edi 368 je 6f 369 shll $16,%ecx 370SRC(5: movb (%esi), %cl ) 371DST( movb %cl, (%edi) ) 3726: addl %ecx, %eax 373 adcl $0, %eax 3747: 3755000: 376 377# Exception handler: 378.section .fixup, "ax" 379 3806001: 381 movl ARGBASE+20(%esp), %ebx # src_err_ptr 382 movl $-EFAULT, (%ebx) 383 384 # zero the complete destination - computing the rest 385 # is too much work 386 movl ARGBASE+8(%esp), %edi # dst 387 movl ARGBASE+12(%esp), %ecx # len 388 xorl %eax,%eax 389 rep ; stosb 390 391 jmp 5000b 392 3936002: 394 movl ARGBASE+24(%esp), %ebx # dst_err_ptr 395 movl $-EFAULT,(%ebx) 396 jmp 5000b 397 398.previous 399 400 popl %ebx 401 popl %esi 402 popl %edi 403 popl %ecx # equivalent to addl $4,%esp 404 ret 405ENDPROC(csum_partial_copy_generic) 406 407#else 408 409/* Version for PentiumII/PPro */ 410 411#define ROUND1(x) \ 412 SRC(movl x(%esi), %ebx ) ; \ 413 addl %ebx, %eax ; \ 414 DST(movl %ebx, x(%edi) ) ; 415 416#define ROUND(x) \ 417 SRC(movl x(%esi), %ebx ) ; \ 418 adcl %ebx, %eax ; \ 419 DST(movl %ebx, x(%edi) ) ; 420 421#define ARGBASE 12 422 423ENTRY(csum_partial_copy_generic) 424 pushl %ebx 425 pushl %edi 426 pushl %esi 427 movl ARGBASE+4(%esp),%esi #src 428 movl ARGBASE+8(%esp),%edi #dst 429 movl ARGBASE+12(%esp),%ecx #len 430 movl ARGBASE+16(%esp),%eax #sum 431# movl %ecx, %edx 432 movl %ecx, %ebx 433 movl %esi, %edx 434 shrl $6, %ecx 435 andl $0x3c, %ebx 436 negl %ebx 437 subl %ebx, %esi 438 subl %ebx, %edi 439 lea -1(%esi),%edx 440 andl $-32,%edx 441 lea 3f(%ebx,%ebx), %ebx 442 testl %esi, %esi 443 JMP_NOSPEC %ebx 4441: addl $64,%esi 445 addl $64,%edi 446 SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) 447 ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52) 448 ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36) 449 ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20) 450 ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4) 4513: adcl $0,%eax 452 addl $64, %edx 453 dec %ecx 454 jge 1b 4554: movl ARGBASE+12(%esp),%edx #len 456 andl $3, %edx 457 jz 7f 458 cmpl $2, %edx 459 jb 5f 460SRC( movw (%esi), %dx ) 461 leal 2(%esi), %esi 462DST( movw %dx, (%edi) ) 463 leal 2(%edi), %edi 464 je 6f 465 shll $16,%edx 4665: 467SRC( movb (%esi), %dl ) 468DST( movb %dl, (%edi) ) 4696: addl %edx, %eax 470 adcl $0, %eax 4717: 472.section .fixup, "ax" 4736001: movl ARGBASE+20(%esp), %ebx # src_err_ptr 474 movl $-EFAULT, (%ebx) 475 # zero the complete destination (computing the rest is too much work) 476 movl ARGBASE+8(%esp),%edi # dst 477 movl ARGBASE+12(%esp),%ecx # len 478 xorl %eax,%eax 479 rep; stosb 480 jmp 7b 4816002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr 482 movl $-EFAULT, (%ebx) 483 jmp 7b 484.previous 485 486 popl %esi 487 popl %edi 488 popl %ebx 489 ret 490ENDPROC(csum_partial_copy_generic) 491 492#undef ROUND 493#undef ROUND1 494 495#endif 496EXPORT_SYMBOL(csum_partial_copy_generic) 497