xref: /linux/lib/crypto/aes.c (revision a4e573db06a4e8c519ec4c42f8e1249a0853367a)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2017-2019 Linaro Ltd <ard.biesheuvel@linaro.org>
4  * Copyright 2026 Google LLC
5  */
6 
7 #include <crypto/aes.h>
8 #include <linux/cache.h>
9 #include <linux/crypto.h>
10 #include <linux/export.h>
11 #include <linux/module.h>
12 #include <linux/unaligned.h>
13 
14 /*
15  * Emit the sbox as volatile const to prevent the compiler from doing
16  * constant folding on sbox references involving fixed indexes.
17  */
18 static volatile const u8 ____cacheline_aligned aes_sbox[] = {
19 	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
20 	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
21 	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
22 	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
23 	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
24 	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
25 	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
26 	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
27 	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
28 	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
29 	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
30 	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
31 	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
32 	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
33 	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
34 	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
35 	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
36 	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
37 	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
38 	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
39 	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
40 	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
41 	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
42 	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
43 	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
44 	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
45 	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
46 	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
47 	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
48 	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
49 	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
50 	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
51 };
52 
53 static volatile const u8 ____cacheline_aligned aes_inv_sbox[] = {
54 	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
55 	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
56 	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
57 	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
58 	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
59 	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
60 	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
61 	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
62 	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
63 	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
64 	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
65 	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
66 	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
67 	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
68 	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
69 	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
70 	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
71 	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
72 	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
73 	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
74 	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
75 	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
76 	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
77 	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
78 	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
79 	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
80 	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
81 	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
82 	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
83 	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
84 	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
85 	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d,
86 };
87 
88 extern const u8 crypto_aes_sbox[256] __alias(aes_sbox);
89 extern const u8 crypto_aes_inv_sbox[256] __alias(aes_inv_sbox);
90 
91 EXPORT_SYMBOL(crypto_aes_sbox);
92 EXPORT_SYMBOL(crypto_aes_inv_sbox);
93 
94 /* aes_enc_tab[i] contains MixColumn([SubByte(i), 0, 0, 0]). */
95 const u32 ____cacheline_aligned aes_enc_tab[256] = {
96 	0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6, 0x0df2f2ff, 0xbd6b6bd6,
97 	0xb16f6fde, 0x54c5c591, 0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56,
98 	0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec, 0x45caca8f, 0x9d82821f,
99 	0x40c9c989, 0x877d7dfa, 0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb,
100 	0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45, 0xbf9c9c23, 0xf7a4a453,
101 	0x967272e4, 0x5bc0c09b, 0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c,
102 	0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83, 0x5c343468, 0xf4a5a551,
103 	0x34e5e5d1, 0x08f1f1f9, 0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a,
104 	0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d, 0x28181830, 0xa1969637,
105 	0x0f05050a, 0xb59a9a2f, 0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df,
106 	0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea, 0x1b090912, 0x9e83831d,
107 	0x742c2c58, 0x2e1a1a34, 0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b,
108 	0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d, 0x7b292952, 0x3ee3e3dd,
109 	0x712f2f5e, 0x97848413, 0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1,
110 	0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6, 0xbe6a6ad4, 0x46cbcb8d,
111 	0xd9bebe67, 0x4b393972, 0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85,
112 	0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed, 0xc5434386, 0xd74d4d9a,
113 	0x55333366, 0x94858511, 0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe,
114 	0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b, 0xf35151a2, 0xfea3a35d,
115 	0xc0404080, 0x8a8f8f05, 0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1,
116 	0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142, 0x30101020, 0x1affffe5,
117 	0x0ef3f3fd, 0x6dd2d2bf, 0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3,
118 	0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e, 0x57c4c493, 0xf2a7a755,
119 	0x827e7efc, 0x473d3d7a, 0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6,
120 	0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3, 0x66222244, 0x7e2a2a54,
121 	0xab90903b, 0x8388880b, 0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428,
122 	0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad, 0x3be0e0db, 0x56323264,
123 	0x4e3a3a74, 0x1e0a0a14, 0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8,
124 	0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4, 0xa8919139, 0xa4959531,
125 	0x37e4e4d3, 0x8b7979f2, 0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda,
126 	0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949, 0xb46c6cd8, 0xfa5656ac,
127 	0x07f4f4f3, 0x25eaeacf, 0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810,
128 	0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c, 0x241c1c38, 0xf1a6a657,
129 	0xc7b4b473, 0x51c6c697, 0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e,
130 	0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f, 0x907070e0, 0x423e3e7c,
131 	0xc4b5b571, 0xaa6666cc, 0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c,
132 	0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969, 0x91868617, 0x58c1c199,
133 	0x271d1d3a, 0xb99e9e27, 0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122,
134 	0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433, 0xb69b9b2d, 0x221e1e3c,
135 	0x92878715, 0x20e9e9c9, 0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5,
136 	0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a, 0xdabfbf65, 0x31e6e6d7,
137 	0xc6424284, 0xb86868d0, 0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e,
138 	0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c,
139 };
140 EXPORT_SYMBOL(aes_enc_tab);
141 
142 /* aes_dec_tab[i] contains InvMixColumn([InvSubByte(i), 0, 0, 0]). */
143 const u32 ____cacheline_aligned aes_dec_tab[256] = {
144 	0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a, 0xcb6bab3b, 0xf1459d1f,
145 	0xab58faac, 0x9303e34b, 0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5,
146 	0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5, 0x495ab1de, 0x671bba25,
147 	0x980eea45, 0xe1c0fe5d, 0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b,
148 	0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295, 0x2d83bed4, 0xd3217458,
149 	0x2969e049, 0x44c8c98e, 0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927,
150 	0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d, 0x184adf63, 0x82311ae5,
151 	0x60335197, 0x457f5362, 0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9,
152 	0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52, 0x23d373ab, 0xe2024b72,
153 	0x578f1fe3, 0x2aab5566, 0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3,
154 	0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed, 0x2b1ccf8a, 0x92b479a7,
155 	0xf0f207f3, 0xa1e2694e, 0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4,
156 	0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4, 0x39ec830b, 0xaaef6040,
157 	0x069f715e, 0x51106ebd, 0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d,
158 	0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060, 0x24fb9819, 0x97e9bdd6,
159 	0xcc434089, 0x779ed967, 0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879,
160 	0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000, 0x83868009, 0x48ed2b32,
161 	0xac70111e, 0x4e725a6c, 0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36,
162 	0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624, 0xb1670a0c, 0x0fe75793,
163 	0xd296eeb4, 0x9e919b1b, 0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c,
164 	0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12, 0x0b0d090e, 0xadc78bf2,
165 	0xb9a8b62d, 0xc8a91e14, 0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3,
166 	0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b, 0x7629438b, 0xdcc623cb,
167 	0x68fcedb6, 0x63f1e4b8, 0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684,
168 	0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7, 0x4b2f9e1d, 0xf330b2dc,
169 	0xec52860d, 0xd0e3c177, 0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947,
170 	0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322, 0xc74e4987, 0xc1d138d9,
171 	0xfea2ca8c, 0x360bd498, 0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f,
172 	0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54, 0xc2138df6, 0xe8b8d890,
173 	0x5ef7392e, 0xf5afc382, 0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf,
174 	0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb, 0x097826cd, 0xf418596e,
175 	0x01b79aec, 0xa89a4f83, 0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef,
176 	0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029, 0xafb2a431, 0x31233f2a,
177 	0x3094a5c6, 0xc066a235, 0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733,
178 	0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117, 0x8dd64d76, 0x4db0ef43,
179 	0x544daacc, 0xdf0496e4, 0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546,
180 	0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb, 0x5a1d67b3, 0x52d2db92,
181 	0x335610e9, 0x1347d66d, 0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb,
182 	0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a, 0x59dfd29c, 0x3f73f255,
183 	0x79ce1418, 0xbf37c773, 0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478,
184 	0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2, 0x72c31d16, 0x0c25e2bc,
185 	0x8b493c28, 0x41950dff, 0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664,
186 	0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0,
187 };
188 EXPORT_SYMBOL(aes_dec_tab);
189 
190 /* Prefetch data into L1 cache.  @mem should be cacheline-aligned. */
191 static __always_inline void aes_prefetch(const void *mem, size_t len)
192 {
193 	for (size_t i = 0; i < len; i += L1_CACHE_BYTES)
194 		*(volatile const u8 *)(mem + i);
195 	barrier();
196 }
197 
198 static u32 mul_by_x(u32 w)
199 {
200 	u32 x = w & 0x7f7f7f7f;
201 	u32 y = w & 0x80808080;
202 
203 	/* multiply by polynomial 'x' (0b10) in GF(2^8) */
204 	return (x << 1) ^ (y >> 7) * 0x1b;
205 }
206 
207 static u32 mul_by_x2(u32 w)
208 {
209 	u32 x = w & 0x3f3f3f3f;
210 	u32 y = w & 0x80808080;
211 	u32 z = w & 0x40404040;
212 
213 	/* multiply by polynomial 'x^2' (0b100) in GF(2^8) */
214 	return (x << 2) ^ (y >> 7) * 0x36 ^ (z >> 6) * 0x1b;
215 }
216 
217 static u32 mix_columns(u32 x)
218 {
219 	/*
220 	 * Perform the following matrix multiplication in GF(2^8)
221 	 *
222 	 * | 0x2 0x3 0x1 0x1 |   | x[0] |
223 	 * | 0x1 0x2 0x3 0x1 |   | x[1] |
224 	 * | 0x1 0x1 0x2 0x3 | x | x[2] |
225 	 * | 0x3 0x1 0x1 0x2 |   | x[3] |
226 	 */
227 	u32 y = mul_by_x(x) ^ ror32(x, 16);
228 
229 	return y ^ ror32(x ^ y, 8);
230 }
231 
232 static u32 inv_mix_columns(u32 x)
233 {
234 	/*
235 	 * Perform the following matrix multiplication in GF(2^8)
236 	 *
237 	 * | 0xe 0xb 0xd 0x9 |   | x[0] |
238 	 * | 0x9 0xe 0xb 0xd |   | x[1] |
239 	 * | 0xd 0x9 0xe 0xb | x | x[2] |
240 	 * | 0xb 0xd 0x9 0xe |   | x[3] |
241 	 *
242 	 * which can conveniently be reduced to
243 	 *
244 	 * | 0x2 0x3 0x1 0x1 |   | 0x5 0x0 0x4 0x0 |   | x[0] |
245 	 * | 0x1 0x2 0x3 0x1 |   | 0x0 0x5 0x0 0x4 |   | x[1] |
246 	 * | 0x1 0x1 0x2 0x3 | x | 0x4 0x0 0x5 0x0 | x | x[2] |
247 	 * | 0x3 0x1 0x1 0x2 |   | 0x0 0x4 0x0 0x5 |   | x[3] |
248 	 */
249 	u32 y = mul_by_x2(x);
250 
251 	return mix_columns(x ^ y ^ ror32(y, 16));
252 }
253 
254 static __always_inline u32 subshift(u32 in[], int pos)
255 {
256 	return (aes_sbox[in[pos] & 0xff]) ^
257 	       (aes_sbox[(in[(pos + 1) % 4] >>  8) & 0xff] <<  8) ^
258 	       (aes_sbox[(in[(pos + 2) % 4] >> 16) & 0xff] << 16) ^
259 	       (aes_sbox[(in[(pos + 3) % 4] >> 24) & 0xff] << 24);
260 }
261 
262 static __always_inline u32 inv_subshift(u32 in[], int pos)
263 {
264 	return (aes_inv_sbox[in[pos] & 0xff]) ^
265 	       (aes_inv_sbox[(in[(pos + 3) % 4] >>  8) & 0xff] <<  8) ^
266 	       (aes_inv_sbox[(in[(pos + 2) % 4] >> 16) & 0xff] << 16) ^
267 	       (aes_inv_sbox[(in[(pos + 1) % 4] >> 24) & 0xff] << 24);
268 }
269 
270 static u32 subw(u32 in)
271 {
272 	return (aes_sbox[in & 0xff]) ^
273 	       (aes_sbox[(in >>  8) & 0xff] <<  8) ^
274 	       (aes_sbox[(in >> 16) & 0xff] << 16) ^
275 	       (aes_sbox[(in >> 24) & 0xff] << 24);
276 }
277 
278 static void aes_expandkey_generic(u32 rndkeys[], u32 *inv_rndkeys,
279 				  const u8 *in_key, int key_len)
280 {
281 	u32 kwords = key_len / sizeof(u32);
282 	u32 rc, i, j;
283 
284 	for (i = 0; i < kwords; i++)
285 		rndkeys[i] = get_unaligned_le32(&in_key[i * sizeof(u32)]);
286 
287 	for (i = 0, rc = 1; i < 10; i++, rc = mul_by_x(rc)) {
288 		u32 *rki = &rndkeys[i * kwords];
289 		u32 *rko = rki + kwords;
290 
291 		rko[0] = ror32(subw(rki[kwords - 1]), 8) ^ rc ^ rki[0];
292 		rko[1] = rko[0] ^ rki[1];
293 		rko[2] = rko[1] ^ rki[2];
294 		rko[3] = rko[2] ^ rki[3];
295 
296 		if (key_len == AES_KEYSIZE_192) {
297 			if (i >= 7)
298 				break;
299 			rko[4] = rko[3] ^ rki[4];
300 			rko[5] = rko[4] ^ rki[5];
301 		} else if (key_len == AES_KEYSIZE_256) {
302 			if (i >= 6)
303 				break;
304 			rko[4] = subw(rko[3]) ^ rki[4];
305 			rko[5] = rko[4] ^ rki[5];
306 			rko[6] = rko[5] ^ rki[6];
307 			rko[7] = rko[6] ^ rki[7];
308 		}
309 	}
310 
311 	/*
312 	 * Generate the decryption keys for the Equivalent Inverse Cipher.
313 	 * This involves reversing the order of the round keys, and applying
314 	 * the Inverse Mix Columns transformation to all but the first and
315 	 * the last one.
316 	 */
317 	if (inv_rndkeys) {
318 		inv_rndkeys[0] = rndkeys[key_len + 24];
319 		inv_rndkeys[1] = rndkeys[key_len + 25];
320 		inv_rndkeys[2] = rndkeys[key_len + 26];
321 		inv_rndkeys[3] = rndkeys[key_len + 27];
322 
323 		for (i = 4, j = key_len + 20; j > 0; i += 4, j -= 4) {
324 			inv_rndkeys[i]     = inv_mix_columns(rndkeys[j]);
325 			inv_rndkeys[i + 1] = inv_mix_columns(rndkeys[j + 1]);
326 			inv_rndkeys[i + 2] = inv_mix_columns(rndkeys[j + 2]);
327 			inv_rndkeys[i + 3] = inv_mix_columns(rndkeys[j + 3]);
328 		}
329 
330 		inv_rndkeys[i]     = rndkeys[0];
331 		inv_rndkeys[i + 1] = rndkeys[1];
332 		inv_rndkeys[i + 2] = rndkeys[2];
333 		inv_rndkeys[i + 3] = rndkeys[3];
334 	}
335 }
336 
337 int aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
338 		  unsigned int key_len)
339 {
340 	if (aes_check_keylen(key_len) != 0)
341 		return -EINVAL;
342 	ctx->key_length = key_len;
343 	aes_expandkey_generic(ctx->key_enc, ctx->key_dec, in_key, key_len);
344 	return 0;
345 }
346 EXPORT_SYMBOL(aes_expandkey);
347 
348 void aes_encrypt_old(const struct crypto_aes_ctx *ctx, u8 *out, const u8 *in)
349 {
350 	const u32 *rkp = ctx->key_enc + 4;
351 	int rounds = 6 + ctx->key_length / 4;
352 	u32 st0[4], st1[4];
353 	int round;
354 
355 	st0[0] = ctx->key_enc[0] ^ get_unaligned_le32(in);
356 	st0[1] = ctx->key_enc[1] ^ get_unaligned_le32(in + 4);
357 	st0[2] = ctx->key_enc[2] ^ get_unaligned_le32(in + 8);
358 	st0[3] = ctx->key_enc[3] ^ get_unaligned_le32(in + 12);
359 
360 	/*
361 	 * Force the compiler to emit data independent Sbox references,
362 	 * by xoring the input with Sbox values that are known to add up
363 	 * to zero. This pulls the entire Sbox into the D-cache before any
364 	 * data dependent lookups are done.
365 	 */
366 	st0[0] ^= aes_sbox[ 0] ^ aes_sbox[ 64] ^ aes_sbox[134] ^ aes_sbox[195];
367 	st0[1] ^= aes_sbox[16] ^ aes_sbox[ 82] ^ aes_sbox[158] ^ aes_sbox[221];
368 	st0[2] ^= aes_sbox[32] ^ aes_sbox[ 96] ^ aes_sbox[160] ^ aes_sbox[234];
369 	st0[3] ^= aes_sbox[48] ^ aes_sbox[112] ^ aes_sbox[186] ^ aes_sbox[241];
370 
371 	for (round = 0;; round += 2, rkp += 8) {
372 		st1[0] = mix_columns(subshift(st0, 0)) ^ rkp[0];
373 		st1[1] = mix_columns(subshift(st0, 1)) ^ rkp[1];
374 		st1[2] = mix_columns(subshift(st0, 2)) ^ rkp[2];
375 		st1[3] = mix_columns(subshift(st0, 3)) ^ rkp[3];
376 
377 		if (round == rounds - 2)
378 			break;
379 
380 		st0[0] = mix_columns(subshift(st1, 0)) ^ rkp[4];
381 		st0[1] = mix_columns(subshift(st1, 1)) ^ rkp[5];
382 		st0[2] = mix_columns(subshift(st1, 2)) ^ rkp[6];
383 		st0[3] = mix_columns(subshift(st1, 3)) ^ rkp[7];
384 	}
385 
386 	put_unaligned_le32(subshift(st1, 0) ^ rkp[4], out);
387 	put_unaligned_le32(subshift(st1, 1) ^ rkp[5], out + 4);
388 	put_unaligned_le32(subshift(st1, 2) ^ rkp[6], out + 8);
389 	put_unaligned_le32(subshift(st1, 3) ^ rkp[7], out + 12);
390 }
391 EXPORT_SYMBOL(aes_encrypt_old);
392 
393 static __always_inline u32 enc_quarterround(const u32 w[4], int i, u32 rk)
394 {
395 	return rk ^ aes_enc_tab[(u8)w[i]] ^
396 	       rol32(aes_enc_tab[(u8)(w[(i + 1) % 4] >> 8)], 8) ^
397 	       rol32(aes_enc_tab[(u8)(w[(i + 2) % 4] >> 16)], 16) ^
398 	       rol32(aes_enc_tab[(u8)(w[(i + 3) % 4] >> 24)], 24);
399 }
400 
401 static __always_inline u32 enclast_quarterround(const u32 w[4], int i, u32 rk)
402 {
403 	return rk ^ ((aes_enc_tab[(u8)w[i]] & 0x0000ff00) >> 8) ^
404 	       (aes_enc_tab[(u8)(w[(i + 1) % 4] >> 8)] & 0x0000ff00) ^
405 	       ((aes_enc_tab[(u8)(w[(i + 2) % 4] >> 16)] & 0x0000ff00) << 8) ^
406 	       ((aes_enc_tab[(u8)(w[(i + 3) % 4] >> 24)] & 0x0000ff00) << 16);
407 }
408 
409 static void __maybe_unused aes_encrypt_generic(const u32 rndkeys[], int nrounds,
410 					       u8 out[AES_BLOCK_SIZE],
411 					       const u8 in[AES_BLOCK_SIZE])
412 {
413 	const u32 *rkp = rndkeys;
414 	int n = nrounds - 1;
415 	u32 w[4];
416 
417 	w[0] = get_unaligned_le32(&in[0]) ^ *rkp++;
418 	w[1] = get_unaligned_le32(&in[4]) ^ *rkp++;
419 	w[2] = get_unaligned_le32(&in[8]) ^ *rkp++;
420 	w[3] = get_unaligned_le32(&in[12]) ^ *rkp++;
421 
422 	/*
423 	 * Prefetch the table before doing data and key-dependent loads from it.
424 	 *
425 	 * This is intended only as a basic constant-time hardening measure that
426 	 * avoids interfering with performance too much.  Its effectiveness is
427 	 * not guaranteed.  For proper constant-time AES, a CPU that supports
428 	 * AES instructions should be used instead.
429 	 */
430 	aes_prefetch(aes_enc_tab, sizeof(aes_enc_tab));
431 
432 	do {
433 		u32 w0 = enc_quarterround(w, 0, *rkp++);
434 		u32 w1 = enc_quarterround(w, 1, *rkp++);
435 		u32 w2 = enc_quarterround(w, 2, *rkp++);
436 		u32 w3 = enc_quarterround(w, 3, *rkp++);
437 
438 		w[0] = w0;
439 		w[1] = w1;
440 		w[2] = w2;
441 		w[3] = w3;
442 	} while (--n);
443 
444 	put_unaligned_le32(enclast_quarterround(w, 0, *rkp++), &out[0]);
445 	put_unaligned_le32(enclast_quarterround(w, 1, *rkp++), &out[4]);
446 	put_unaligned_le32(enclast_quarterround(w, 2, *rkp++), &out[8]);
447 	put_unaligned_le32(enclast_quarterround(w, 3, *rkp++), &out[12]);
448 }
449 
450 static __always_inline u32 dec_quarterround(const u32 w[4], int i, u32 rk)
451 {
452 	return rk ^ aes_dec_tab[(u8)w[i]] ^
453 	       rol32(aes_dec_tab[(u8)(w[(i + 3) % 4] >> 8)], 8) ^
454 	       rol32(aes_dec_tab[(u8)(w[(i + 2) % 4] >> 16)], 16) ^
455 	       rol32(aes_dec_tab[(u8)(w[(i + 1) % 4] >> 24)], 24);
456 }
457 
458 static __always_inline u32 declast_quarterround(const u32 w[4], int i, u32 rk)
459 {
460 	return rk ^ aes_inv_sbox[(u8)w[i]] ^
461 	       ((u32)aes_inv_sbox[(u8)(w[(i + 3) % 4] >> 8)] << 8) ^
462 	       ((u32)aes_inv_sbox[(u8)(w[(i + 2) % 4] >> 16)] << 16) ^
463 	       ((u32)aes_inv_sbox[(u8)(w[(i + 1) % 4] >> 24)] << 24);
464 }
465 
466 static void __maybe_unused aes_decrypt_generic(const u32 inv_rndkeys[],
467 					       int nrounds,
468 					       u8 out[AES_BLOCK_SIZE],
469 					       const u8 in[AES_BLOCK_SIZE])
470 {
471 	const u32 *rkp = inv_rndkeys;
472 	int n = nrounds - 1;
473 	u32 w[4];
474 
475 	w[0] = get_unaligned_le32(&in[0]) ^ *rkp++;
476 	w[1] = get_unaligned_le32(&in[4]) ^ *rkp++;
477 	w[2] = get_unaligned_le32(&in[8]) ^ *rkp++;
478 	w[3] = get_unaligned_le32(&in[12]) ^ *rkp++;
479 
480 	aes_prefetch(aes_dec_tab, sizeof(aes_dec_tab));
481 
482 	do {
483 		u32 w0 = dec_quarterround(w, 0, *rkp++);
484 		u32 w1 = dec_quarterround(w, 1, *rkp++);
485 		u32 w2 = dec_quarterround(w, 2, *rkp++);
486 		u32 w3 = dec_quarterround(w, 3, *rkp++);
487 
488 		w[0] = w0;
489 		w[1] = w1;
490 		w[2] = w2;
491 		w[3] = w3;
492 	} while (--n);
493 
494 	aes_prefetch((const void *)aes_inv_sbox, sizeof(aes_inv_sbox));
495 	put_unaligned_le32(declast_quarterround(w, 0, *rkp++), &out[0]);
496 	put_unaligned_le32(declast_quarterround(w, 1, *rkp++), &out[4]);
497 	put_unaligned_le32(declast_quarterround(w, 2, *rkp++), &out[8]);
498 	put_unaligned_le32(declast_quarterround(w, 3, *rkp++), &out[12]);
499 }
500 
501 void aes_decrypt_old(const struct crypto_aes_ctx *ctx, u8 *out, const u8 *in)
502 {
503 	const u32 *rkp = ctx->key_dec + 4;
504 	int rounds = 6 + ctx->key_length / 4;
505 	u32 st0[4], st1[4];
506 	int round;
507 
508 	st0[0] = ctx->key_dec[0] ^ get_unaligned_le32(in);
509 	st0[1] = ctx->key_dec[1] ^ get_unaligned_le32(in + 4);
510 	st0[2] = ctx->key_dec[2] ^ get_unaligned_le32(in + 8);
511 	st0[3] = ctx->key_dec[3] ^ get_unaligned_le32(in + 12);
512 
513 	/*
514 	 * Force the compiler to emit data independent Sbox references,
515 	 * by xoring the input with Sbox values that are known to add up
516 	 * to zero. This pulls the entire Sbox into the D-cache before any
517 	 * data dependent lookups are done.
518 	 */
519 	st0[0] ^= aes_inv_sbox[ 0] ^ aes_inv_sbox[ 64] ^ aes_inv_sbox[129] ^ aes_inv_sbox[200];
520 	st0[1] ^= aes_inv_sbox[16] ^ aes_inv_sbox[ 83] ^ aes_inv_sbox[150] ^ aes_inv_sbox[212];
521 	st0[2] ^= aes_inv_sbox[32] ^ aes_inv_sbox[ 96] ^ aes_inv_sbox[160] ^ aes_inv_sbox[236];
522 	st0[3] ^= aes_inv_sbox[48] ^ aes_inv_sbox[112] ^ aes_inv_sbox[187] ^ aes_inv_sbox[247];
523 
524 	for (round = 0;; round += 2, rkp += 8) {
525 		st1[0] = inv_mix_columns(inv_subshift(st0, 0)) ^ rkp[0];
526 		st1[1] = inv_mix_columns(inv_subshift(st0, 1)) ^ rkp[1];
527 		st1[2] = inv_mix_columns(inv_subshift(st0, 2)) ^ rkp[2];
528 		st1[3] = inv_mix_columns(inv_subshift(st0, 3)) ^ rkp[3];
529 
530 		if (round == rounds - 2)
531 			break;
532 
533 		st0[0] = inv_mix_columns(inv_subshift(st1, 0)) ^ rkp[4];
534 		st0[1] = inv_mix_columns(inv_subshift(st1, 1)) ^ rkp[5];
535 		st0[2] = inv_mix_columns(inv_subshift(st1, 2)) ^ rkp[6];
536 		st0[3] = inv_mix_columns(inv_subshift(st1, 3)) ^ rkp[7];
537 	}
538 
539 	put_unaligned_le32(inv_subshift(st1, 0) ^ rkp[4], out);
540 	put_unaligned_le32(inv_subshift(st1, 1) ^ rkp[5], out + 4);
541 	put_unaligned_le32(inv_subshift(st1, 2) ^ rkp[6], out + 8);
542 	put_unaligned_le32(inv_subshift(st1, 3) ^ rkp[7], out + 12);
543 }
544 EXPORT_SYMBOL(aes_decrypt_old);
545 
546 /*
547  * Note: the aes_prepare*key_* names reflect the fact that the implementation
548  * might not actually expand the key.  (The s390 code for example doesn't.)
549  * Where the key is expanded we use the more specific names aes_expandkey_*.
550  *
551  * aes_preparekey_arch() is passed an optional pointer 'inv_k' which points to
552  * the area to store the prepared decryption key.  It will be NULL if the user
553  * is requesting encryption-only.  aes_preparekey_arch() is also passed a valid
554  * 'key_len' and 'nrounds', corresponding to AES-128, AES-192, or AES-256.
555  */
556 #ifdef CONFIG_CRYPTO_LIB_AES_ARCH
557 /* An arch-specific implementation of AES is available.  Include it. */
558 #include "aes.h" /* $(SRCARCH)/aes.h */
559 #else
560 /* No arch-specific implementation of AES is available.  Use generic code. */
561 
562 static void aes_preparekey_arch(union aes_enckey_arch *k,
563 				union aes_invkey_arch *inv_k,
564 				const u8 *in_key, int key_len, int nrounds)
565 {
566 	aes_expandkey_generic(k->rndkeys, inv_k ? inv_k->inv_rndkeys : NULL,
567 			      in_key, key_len);
568 }
569 
570 static void aes_encrypt_arch(const struct aes_enckey *key,
571 			     u8 out[AES_BLOCK_SIZE],
572 			     const u8 in[AES_BLOCK_SIZE])
573 {
574 	aes_encrypt_generic(key->k.rndkeys, key->nrounds, out, in);
575 }
576 
577 static void aes_decrypt_arch(const struct aes_key *key,
578 			     u8 out[AES_BLOCK_SIZE],
579 			     const u8 in[AES_BLOCK_SIZE])
580 {
581 	aes_decrypt_generic(key->inv_k.inv_rndkeys, key->nrounds, out, in);
582 }
583 #endif
584 
585 static int __aes_preparekey(struct aes_enckey *enc_key,
586 			    union aes_invkey_arch *inv_k,
587 			    const u8 *in_key, size_t key_len)
588 {
589 	if (aes_check_keylen(key_len) != 0)
590 		return -EINVAL;
591 	enc_key->len = key_len;
592 	enc_key->nrounds = 6 + key_len / 4;
593 	aes_preparekey_arch(&enc_key->k, inv_k, in_key, key_len,
594 			    enc_key->nrounds);
595 	return 0;
596 }
597 
598 int aes_preparekey(struct aes_key *key, const u8 *in_key, size_t key_len)
599 {
600 	return __aes_preparekey((struct aes_enckey *)key, &key->inv_k,
601 				in_key, key_len);
602 }
603 EXPORT_SYMBOL(aes_preparekey);
604 
605 int aes_prepareenckey(struct aes_enckey *key, const u8 *in_key, size_t key_len)
606 {
607 	return __aes_preparekey(key, NULL, in_key, key_len);
608 }
609 EXPORT_SYMBOL(aes_prepareenckey);
610 
611 void aes_encrypt_new(aes_encrypt_arg key, u8 out[AES_BLOCK_SIZE],
612 		     const u8 in[AES_BLOCK_SIZE])
613 {
614 	aes_encrypt_arch(key.enc_key, out, in);
615 }
616 EXPORT_SYMBOL(aes_encrypt_new);
617 
618 void aes_decrypt_new(const struct aes_key *key, u8 out[AES_BLOCK_SIZE],
619 		     const u8 in[AES_BLOCK_SIZE])
620 {
621 	aes_decrypt_arch(key, out, in);
622 }
623 EXPORT_SYMBOL(aes_decrypt_new);
624 
625 #ifdef aes_mod_init_arch
626 static int __init aes_mod_init(void)
627 {
628 	aes_mod_init_arch();
629 	return 0;
630 }
631 subsys_initcall(aes_mod_init);
632 
633 static void __exit aes_mod_exit(void)
634 {
635 }
636 module_exit(aes_mod_exit);
637 #endif
638 
639 MODULE_DESCRIPTION("AES block cipher");
640 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
641 MODULE_AUTHOR("Eric Biggers <ebiggers@kernel.org>");
642 MODULE_LICENSE("GPL v2");
643