1d2912cb1SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */ 21da177e4SLinus Torvalds/* 31da177e4SLinus Torvalds * linux/arch/arm/lib/csumpartialcopygeneric.S 41da177e4SLinus Torvalds * 51da177e4SLinus Torvalds * Copyright (C) 1995-2001 Russell King 61da177e4SLinus Torvalds */ 76ebbf2ceSRussell King#include <asm/assembler.h> 81da177e4SLinus Torvalds 91da177e4SLinus Torvalds/* 101da177e4SLinus Torvalds * unsigned int 111da177e4SLinus Torvalds * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, ) 121da177e4SLinus Torvalds * r0 = src, r1 = dst, r2 = len, r3 = sum 131da177e4SLinus Torvalds * Returns : r0 = checksum 141da177e4SLinus Torvalds * 151da177e4SLinus Torvalds * Note that 'tst' and 'teq' preserve the carry flag. 161da177e4SLinus Torvalds */ 171da177e4SLinus Torvalds 181da177e4SLinus Torvaldssrc .req r0 191da177e4SLinus Torvaldsdst .req r1 201da177e4SLinus Torvaldslen .req r2 211da177e4SLinus Torvaldssum .req r3 221da177e4SLinus Torvalds 238adbb371SNicolas Pitre.Lzero: mov r0, sum 2490303b10SCatalin Marinas load_regs 251da177e4SLinus Torvalds 261da177e4SLinus Torvalds /* 271da177e4SLinus Torvalds * Align an unaligned destination pointer. We know that 281da177e4SLinus Torvalds * we have >= 8 bytes here, so we don't need to check 291da177e4SLinus Torvalds * the length. Note that the source pointer hasn't been 301da177e4SLinus Torvalds * aligned yet. 311da177e4SLinus Torvalds */ 328adbb371SNicolas Pitre.Ldst_unaligned: 338adbb371SNicolas Pitre tst dst, #1 348adbb371SNicolas Pitre beq .Ldst_16bit 351da177e4SLinus Torvalds 361da177e4SLinus Torvalds load1b ip 371da177e4SLinus Torvalds sub len, len, #1 381da177e4SLinus Torvalds adcs sum, sum, ip, put_byte_1 @ update checksum 391da177e4SLinus Torvalds strb ip, [dst], #1 401da177e4SLinus Torvalds tst dst, #2 416ebbf2ceSRussell King reteq lr @ dst is now 32bit aligned 421da177e4SLinus Torvalds 438adbb371SNicolas Pitre.Ldst_16bit: load2b r8, ip 441da177e4SLinus Torvalds sub len, len, #2 451da177e4SLinus Torvalds adcs sum, sum, r8, put_byte_0 461da177e4SLinus Torvalds strb r8, [dst], #1 471da177e4SLinus Torvalds adcs sum, sum, ip, put_byte_1 481da177e4SLinus Torvalds strb ip, [dst], #1 496ebbf2ceSRussell King ret lr @ dst is now 32bit aligned 501da177e4SLinus Torvalds 511da177e4SLinus Torvalds /* 521da177e4SLinus Torvalds * Handle 0 to 7 bytes, with any alignment of source and 531da177e4SLinus Torvalds * destination pointers. Note that when we get here, C = 0 541da177e4SLinus Torvalds */ 558adbb371SNicolas Pitre.Lless8: teq len, #0 @ check for zero count 568adbb371SNicolas Pitre beq .Lzero 571da177e4SLinus Torvalds 581da177e4SLinus Torvalds /* we must have at least one byte. */ 591da177e4SLinus Torvalds tst dst, #1 @ dst 16-bit aligned 608adbb371SNicolas Pitre beq .Lless8_aligned 611da177e4SLinus Torvalds 621da177e4SLinus Torvalds /* Align dst */ 631da177e4SLinus Torvalds load1b ip 641da177e4SLinus Torvalds sub len, len, #1 651da177e4SLinus Torvalds adcs sum, sum, ip, put_byte_1 @ update checksum 661da177e4SLinus Torvalds strb ip, [dst], #1 671da177e4SLinus Torvalds tst len, #6 688adbb371SNicolas Pitre beq .Lless8_byteonly 691da177e4SLinus Torvalds 701da177e4SLinus Torvalds1: load2b r8, ip 711da177e4SLinus Torvalds sub len, len, #2 721da177e4SLinus Torvalds adcs sum, sum, r8, put_byte_0 731da177e4SLinus Torvalds strb r8, [dst], #1 741da177e4SLinus Torvalds adcs sum, sum, ip, put_byte_1 751da177e4SLinus Torvalds strb ip, [dst], #1 768adbb371SNicolas Pitre.Lless8_aligned: 778adbb371SNicolas Pitre tst len, #6 781da177e4SLinus Torvalds bne 1b 798adbb371SNicolas Pitre.Lless8_byteonly: 801da177e4SLinus Torvalds tst len, #1 818adbb371SNicolas Pitre beq .Ldone 821da177e4SLinus Torvalds load1b r8 831da177e4SLinus Torvalds adcs sum, sum, r8, put_byte_0 @ update checksum 841da177e4SLinus Torvalds strb r8, [dst], #1 858adbb371SNicolas Pitre b .Ldone 861da177e4SLinus Torvalds 871da177e4SLinus TorvaldsFN_ENTRY 881da177e4SLinus Torvalds save_regs 89*1d60be3cSAl Viro mov sum, #-1 901da177e4SLinus Torvalds 911da177e4SLinus Torvalds cmp len, #8 @ Ensure that we have at least 928adbb371SNicolas Pitre blo .Lless8 @ 8 bytes to copy. 931da177e4SLinus Torvalds 941da177e4SLinus Torvalds adds sum, sum, #0 @ C = 0 951da177e4SLinus Torvalds tst dst, #3 @ Test destination alignment 968adbb371SNicolas Pitre blne .Ldst_unaligned @ align destination, return here 971da177e4SLinus Torvalds 981da177e4SLinus Torvalds /* 991da177e4SLinus Torvalds * Ok, the dst pointer is now 32bit aligned, and we know 1001da177e4SLinus Torvalds * that we must have more than 4 bytes to copy. Note 1011da177e4SLinus Torvalds * that C contains the carry from the dst alignment above. 1021da177e4SLinus Torvalds */ 1031da177e4SLinus Torvalds 1041da177e4SLinus Torvalds tst src, #3 @ Test source alignment 1058adbb371SNicolas Pitre bne .Lsrc_not_aligned 1061da177e4SLinus Torvalds 1071da177e4SLinus Torvalds /* Routine for src & dst aligned */ 1081da177e4SLinus Torvalds 1091da177e4SLinus Torvalds bics ip, len, #15 1101da177e4SLinus Torvalds beq 2f 1111da177e4SLinus Torvalds 1121da177e4SLinus Torvalds1: load4l r4, r5, r6, r7 1131da177e4SLinus Torvalds stmia dst!, {r4, r5, r6, r7} 1141da177e4SLinus Torvalds adcs sum, sum, r4 1151da177e4SLinus Torvalds adcs sum, sum, r5 1161da177e4SLinus Torvalds adcs sum, sum, r6 1171da177e4SLinus Torvalds adcs sum, sum, r7 1181da177e4SLinus Torvalds sub ip, ip, #16 1191da177e4SLinus Torvalds teq ip, #0 1201da177e4SLinus Torvalds bne 1b 1211da177e4SLinus Torvalds 1221da177e4SLinus Torvalds2: ands ip, len, #12 1231da177e4SLinus Torvalds beq 4f 1241da177e4SLinus Torvalds tst ip, #8 1251da177e4SLinus Torvalds beq 3f 1261da177e4SLinus Torvalds load2l r4, r5 1271da177e4SLinus Torvalds stmia dst!, {r4, r5} 1281da177e4SLinus Torvalds adcs sum, sum, r4 1291da177e4SLinus Torvalds adcs sum, sum, r5 1301da177e4SLinus Torvalds tst ip, #4 1311da177e4SLinus Torvalds beq 4f 1321da177e4SLinus Torvalds 1331da177e4SLinus Torvalds3: load1l r4 1341da177e4SLinus Torvalds str r4, [dst], #4 1351da177e4SLinus Torvalds adcs sum, sum, r4 1361da177e4SLinus Torvalds 1371da177e4SLinus Torvalds4: ands len, len, #3 1388adbb371SNicolas Pitre beq .Ldone 1391da177e4SLinus Torvalds load1l r4 1401da177e4SLinus Torvalds tst len, #2 1411da177e4SLinus Torvalds mov r5, r4, get_byte_0 1428adbb371SNicolas Pitre beq .Lexit 143d98b90eaSVictor Kamensky adcs sum, sum, r4, lspush #16 1441da177e4SLinus Torvalds strb r5, [dst], #1 1451da177e4SLinus Torvalds mov r5, r4, get_byte_1 1461da177e4SLinus Torvalds strb r5, [dst], #1 1471da177e4SLinus Torvalds mov r5, r4, get_byte_2 1488adbb371SNicolas Pitre.Lexit: tst len, #1 149e44fc388SStefan Agner strbne r5, [dst], #1 1501da177e4SLinus Torvalds andne r5, r5, #255 151e44fc388SStefan Agner adcsne sum, sum, r5, put_byte_0 1521da177e4SLinus Torvalds 1531da177e4SLinus Torvalds /* 1541da177e4SLinus Torvalds * If the dst pointer was not 16-bit aligned, we 1551da177e4SLinus Torvalds * need to rotate the checksum here to get around 1561da177e4SLinus Torvalds * the inefficient byte manipulations in the 1571da177e4SLinus Torvalds * architecture independent code. 1581da177e4SLinus Torvalds */ 1598adbb371SNicolas Pitre.Ldone: adc r0, sum, #0 1601da177e4SLinus Torvalds ldr sum, [sp, #0] @ dst 1611da177e4SLinus Torvalds tst sum, #1 1621da177e4SLinus Torvalds movne r0, r0, ror #8 16390303b10SCatalin Marinas load_regs 1641da177e4SLinus Torvalds 1658adbb371SNicolas Pitre.Lsrc_not_aligned: 1661da177e4SLinus Torvalds adc sum, sum, #0 @ include C from dst alignment 1671da177e4SLinus Torvalds and ip, src, #3 1681da177e4SLinus Torvalds bic src, src, #3 1691da177e4SLinus Torvalds load1l r5 1701da177e4SLinus Torvalds cmp ip, #2 1718adbb371SNicolas Pitre beq .Lsrc2_aligned 1728adbb371SNicolas Pitre bhi .Lsrc3_aligned 173d98b90eaSVictor Kamensky mov r4, r5, lspull #8 @ C = 0 1741da177e4SLinus Torvalds bics ip, len, #15 1751da177e4SLinus Torvalds beq 2f 1761da177e4SLinus Torvalds1: load4l r5, r6, r7, r8 177d98b90eaSVictor Kamensky orr r4, r4, r5, lspush #24 178d98b90eaSVictor Kamensky mov r5, r5, lspull #8 179d98b90eaSVictor Kamensky orr r5, r5, r6, lspush #24 180d98b90eaSVictor Kamensky mov r6, r6, lspull #8 181d98b90eaSVictor Kamensky orr r6, r6, r7, lspush #24 182d98b90eaSVictor Kamensky mov r7, r7, lspull #8 183d98b90eaSVictor Kamensky orr r7, r7, r8, lspush #24 1841da177e4SLinus Torvalds stmia dst!, {r4, r5, r6, r7} 1851da177e4SLinus Torvalds adcs sum, sum, r4 1861da177e4SLinus Torvalds adcs sum, sum, r5 1871da177e4SLinus Torvalds adcs sum, sum, r6 1881da177e4SLinus Torvalds adcs sum, sum, r7 189d98b90eaSVictor Kamensky mov r4, r8, lspull #8 1901da177e4SLinus Torvalds sub ip, ip, #16 1911da177e4SLinus Torvalds teq ip, #0 1921da177e4SLinus Torvalds bne 1b 1931da177e4SLinus Torvalds2: ands ip, len, #12 1941da177e4SLinus Torvalds beq 4f 1951da177e4SLinus Torvalds tst ip, #8 1961da177e4SLinus Torvalds beq 3f 1971da177e4SLinus Torvalds load2l r5, r6 198d98b90eaSVictor Kamensky orr r4, r4, r5, lspush #24 199d98b90eaSVictor Kamensky mov r5, r5, lspull #8 200d98b90eaSVictor Kamensky orr r5, r5, r6, lspush #24 2011da177e4SLinus Torvalds stmia dst!, {r4, r5} 2021da177e4SLinus Torvalds adcs sum, sum, r4 2031da177e4SLinus Torvalds adcs sum, sum, r5 204d98b90eaSVictor Kamensky mov r4, r6, lspull #8 2051da177e4SLinus Torvalds tst ip, #4 2061da177e4SLinus Torvalds beq 4f 2071da177e4SLinus Torvalds3: load1l r5 208d98b90eaSVictor Kamensky orr r4, r4, r5, lspush #24 2091da177e4SLinus Torvalds str r4, [dst], #4 2101da177e4SLinus Torvalds adcs sum, sum, r4 211d98b90eaSVictor Kamensky mov r4, r5, lspull #8 2121da177e4SLinus Torvalds4: ands len, len, #3 2138adbb371SNicolas Pitre beq .Ldone 2141da177e4SLinus Torvalds mov r5, r4, get_byte_0 2151da177e4SLinus Torvalds tst len, #2 2168adbb371SNicolas Pitre beq .Lexit 217d98b90eaSVictor Kamensky adcs sum, sum, r4, lspush #16 2181da177e4SLinus Torvalds strb r5, [dst], #1 2191da177e4SLinus Torvalds mov r5, r4, get_byte_1 2201da177e4SLinus Torvalds strb r5, [dst], #1 2211da177e4SLinus Torvalds mov r5, r4, get_byte_2 2228adbb371SNicolas Pitre b .Lexit 2231da177e4SLinus Torvalds 224d98b90eaSVictor Kamensky.Lsrc2_aligned: mov r4, r5, lspull #16 2251da177e4SLinus Torvalds adds sum, sum, #0 2261da177e4SLinus Torvalds bics ip, len, #15 2271da177e4SLinus Torvalds beq 2f 2281da177e4SLinus Torvalds1: load4l r5, r6, r7, r8 229d98b90eaSVictor Kamensky orr r4, r4, r5, lspush #16 230d98b90eaSVictor Kamensky mov r5, r5, lspull #16 231d98b90eaSVictor Kamensky orr r5, r5, r6, lspush #16 232d98b90eaSVictor Kamensky mov r6, r6, lspull #16 233d98b90eaSVictor Kamensky orr r6, r6, r7, lspush #16 234d98b90eaSVictor Kamensky mov r7, r7, lspull #16 235d98b90eaSVictor Kamensky orr r7, r7, r8, lspush #16 2361da177e4SLinus Torvalds stmia dst!, {r4, r5, r6, r7} 2371da177e4SLinus Torvalds adcs sum, sum, r4 2381da177e4SLinus Torvalds adcs sum, sum, r5 2391da177e4SLinus Torvalds adcs sum, sum, r6 2401da177e4SLinus Torvalds adcs sum, sum, r7 241d98b90eaSVictor Kamensky mov r4, r8, lspull #16 2421da177e4SLinus Torvalds sub ip, ip, #16 2431da177e4SLinus Torvalds teq ip, #0 2441da177e4SLinus Torvalds bne 1b 2451da177e4SLinus Torvalds2: ands ip, len, #12 2461da177e4SLinus Torvalds beq 4f 2471da177e4SLinus Torvalds tst ip, #8 2481da177e4SLinus Torvalds beq 3f 2491da177e4SLinus Torvalds load2l r5, r6 250d98b90eaSVictor Kamensky orr r4, r4, r5, lspush #16 251d98b90eaSVictor Kamensky mov r5, r5, lspull #16 252d98b90eaSVictor Kamensky orr r5, r5, r6, lspush #16 2531da177e4SLinus Torvalds stmia dst!, {r4, r5} 2541da177e4SLinus Torvalds adcs sum, sum, r4 2551da177e4SLinus Torvalds adcs sum, sum, r5 256d98b90eaSVictor Kamensky mov r4, r6, lspull #16 2571da177e4SLinus Torvalds tst ip, #4 2581da177e4SLinus Torvalds beq 4f 2591da177e4SLinus Torvalds3: load1l r5 260d98b90eaSVictor Kamensky orr r4, r4, r5, lspush #16 2611da177e4SLinus Torvalds str r4, [dst], #4 2621da177e4SLinus Torvalds adcs sum, sum, r4 263d98b90eaSVictor Kamensky mov r4, r5, lspull #16 2641da177e4SLinus Torvalds4: ands len, len, #3 2658adbb371SNicolas Pitre beq .Ldone 2661da177e4SLinus Torvalds mov r5, r4, get_byte_0 2671da177e4SLinus Torvalds tst len, #2 2688adbb371SNicolas Pitre beq .Lexit 2691da177e4SLinus Torvalds adcs sum, sum, r4 2701da177e4SLinus Torvalds strb r5, [dst], #1 2711da177e4SLinus Torvalds mov r5, r4, get_byte_1 2721da177e4SLinus Torvalds strb r5, [dst], #1 2731da177e4SLinus Torvalds tst len, #1 2748adbb371SNicolas Pitre beq .Ldone 2751da177e4SLinus Torvalds load1b r5 2768adbb371SNicolas Pitre b .Lexit 2771da177e4SLinus Torvalds 278d98b90eaSVictor Kamensky.Lsrc3_aligned: mov r4, r5, lspull #24 2791da177e4SLinus Torvalds adds sum, sum, #0 2801da177e4SLinus Torvalds bics ip, len, #15 2811da177e4SLinus Torvalds beq 2f 2821da177e4SLinus Torvalds1: load4l r5, r6, r7, r8 283d98b90eaSVictor Kamensky orr r4, r4, r5, lspush #8 284d98b90eaSVictor Kamensky mov r5, r5, lspull #24 285d98b90eaSVictor Kamensky orr r5, r5, r6, lspush #8 286d98b90eaSVictor Kamensky mov r6, r6, lspull #24 287d98b90eaSVictor Kamensky orr r6, r6, r7, lspush #8 288d98b90eaSVictor Kamensky mov r7, r7, lspull #24 289d98b90eaSVictor Kamensky orr r7, r7, r8, lspush #8 2901da177e4SLinus Torvalds stmia dst!, {r4, r5, r6, r7} 2911da177e4SLinus Torvalds adcs sum, sum, r4 2921da177e4SLinus Torvalds adcs sum, sum, r5 2931da177e4SLinus Torvalds adcs sum, sum, r6 2941da177e4SLinus Torvalds adcs sum, sum, r7 295d98b90eaSVictor Kamensky mov r4, r8, lspull #24 2961da177e4SLinus Torvalds sub ip, ip, #16 2971da177e4SLinus Torvalds teq ip, #0 2981da177e4SLinus Torvalds bne 1b 2991da177e4SLinus Torvalds2: ands ip, len, #12 3001da177e4SLinus Torvalds beq 4f 3011da177e4SLinus Torvalds tst ip, #8 3021da177e4SLinus Torvalds beq 3f 3031da177e4SLinus Torvalds load2l r5, r6 304d98b90eaSVictor Kamensky orr r4, r4, r5, lspush #8 305d98b90eaSVictor Kamensky mov r5, r5, lspull #24 306d98b90eaSVictor Kamensky orr r5, r5, r6, lspush #8 3071da177e4SLinus Torvalds stmia dst!, {r4, r5} 3081da177e4SLinus Torvalds adcs sum, sum, r4 3091da177e4SLinus Torvalds adcs sum, sum, r5 310d98b90eaSVictor Kamensky mov r4, r6, lspull #24 3111da177e4SLinus Torvalds tst ip, #4 3121da177e4SLinus Torvalds beq 4f 3131da177e4SLinus Torvalds3: load1l r5 314d98b90eaSVictor Kamensky orr r4, r4, r5, lspush #8 3151da177e4SLinus Torvalds str r4, [dst], #4 3161da177e4SLinus Torvalds adcs sum, sum, r4 317d98b90eaSVictor Kamensky mov r4, r5, lspull #24 3181da177e4SLinus Torvalds4: ands len, len, #3 3198adbb371SNicolas Pitre beq .Ldone 3201da177e4SLinus Torvalds mov r5, r4, get_byte_0 3211da177e4SLinus Torvalds tst len, #2 3228adbb371SNicolas Pitre beq .Lexit 3231da177e4SLinus Torvalds strb r5, [dst], #1 3241da177e4SLinus Torvalds adcs sum, sum, r4 3251da177e4SLinus Torvalds load1l r4 3261da177e4SLinus Torvalds mov r5, r4, get_byte_0 3271da177e4SLinus Torvalds strb r5, [dst], #1 328d98b90eaSVictor Kamensky adcs sum, sum, r4, lspush #24 3291da177e4SLinus Torvalds mov r5, r4, get_byte_1 3308adbb371SNicolas Pitre b .Lexit 33193ed3970SCatalin MarinasFN_EXIT 332