xref: /linux/lib/crypto/aes.c (revision 37a93dd5c49b5fda807fd204edf2547c3493319c)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2017-2019 Linaro Ltd <ard.biesheuvel@linaro.org>
4  * Copyright 2026 Google LLC
5  */
6 
7 #include <crypto/aes.h>
8 #include <linux/cache.h>
9 #include <linux/crypto.h>
10 #include <linux/export.h>
11 #include <linux/module.h>
12 #include <linux/unaligned.h>
13 
14 static const u8 ____cacheline_aligned aes_sbox[] = {
15 	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
16 	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
17 	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
18 	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
19 	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
20 	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
21 	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
22 	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
23 	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
24 	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
25 	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
26 	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
27 	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
28 	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
29 	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
30 	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
31 	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
32 	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
33 	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
34 	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
35 	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
36 	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
37 	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
38 	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
39 	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
40 	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
41 	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
42 	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
43 	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
44 	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
45 	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
46 	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
47 };
48 
49 static const u8 ____cacheline_aligned aes_inv_sbox[] = {
50 	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
51 	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
52 	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
53 	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
54 	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
55 	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
56 	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
57 	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
58 	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
59 	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
60 	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
61 	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
62 	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
63 	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
64 	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
65 	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
66 	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
67 	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
68 	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
69 	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
70 	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
71 	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
72 	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
73 	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
74 	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
75 	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
76 	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
77 	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
78 	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
79 	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
80 	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
81 	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d,
82 };
83 
84 extern const u8 crypto_aes_sbox[256] __alias(aes_sbox);
85 extern const u8 crypto_aes_inv_sbox[256] __alias(aes_inv_sbox);
86 
87 EXPORT_SYMBOL(crypto_aes_sbox);
88 EXPORT_SYMBOL(crypto_aes_inv_sbox);
89 
90 /* aes_enc_tab[i] contains MixColumn([SubByte(i), 0, 0, 0]). */
91 const u32 ____cacheline_aligned aes_enc_tab[256] = {
92 	0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6, 0x0df2f2ff, 0xbd6b6bd6,
93 	0xb16f6fde, 0x54c5c591, 0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56,
94 	0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec, 0x45caca8f, 0x9d82821f,
95 	0x40c9c989, 0x877d7dfa, 0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb,
96 	0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45, 0xbf9c9c23, 0xf7a4a453,
97 	0x967272e4, 0x5bc0c09b, 0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c,
98 	0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83, 0x5c343468, 0xf4a5a551,
99 	0x34e5e5d1, 0x08f1f1f9, 0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a,
100 	0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d, 0x28181830, 0xa1969637,
101 	0x0f05050a, 0xb59a9a2f, 0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df,
102 	0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea, 0x1b090912, 0x9e83831d,
103 	0x742c2c58, 0x2e1a1a34, 0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b,
104 	0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d, 0x7b292952, 0x3ee3e3dd,
105 	0x712f2f5e, 0x97848413, 0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1,
106 	0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6, 0xbe6a6ad4, 0x46cbcb8d,
107 	0xd9bebe67, 0x4b393972, 0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85,
108 	0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed, 0xc5434386, 0xd74d4d9a,
109 	0x55333366, 0x94858511, 0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe,
110 	0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b, 0xf35151a2, 0xfea3a35d,
111 	0xc0404080, 0x8a8f8f05, 0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1,
112 	0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142, 0x30101020, 0x1affffe5,
113 	0x0ef3f3fd, 0x6dd2d2bf, 0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3,
114 	0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e, 0x57c4c493, 0xf2a7a755,
115 	0x827e7efc, 0x473d3d7a, 0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6,
116 	0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3, 0x66222244, 0x7e2a2a54,
117 	0xab90903b, 0x8388880b, 0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428,
118 	0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad, 0x3be0e0db, 0x56323264,
119 	0x4e3a3a74, 0x1e0a0a14, 0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8,
120 	0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4, 0xa8919139, 0xa4959531,
121 	0x37e4e4d3, 0x8b7979f2, 0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda,
122 	0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949, 0xb46c6cd8, 0xfa5656ac,
123 	0x07f4f4f3, 0x25eaeacf, 0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810,
124 	0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c, 0x241c1c38, 0xf1a6a657,
125 	0xc7b4b473, 0x51c6c697, 0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e,
126 	0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f, 0x907070e0, 0x423e3e7c,
127 	0xc4b5b571, 0xaa6666cc, 0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c,
128 	0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969, 0x91868617, 0x58c1c199,
129 	0x271d1d3a, 0xb99e9e27, 0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122,
130 	0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433, 0xb69b9b2d, 0x221e1e3c,
131 	0x92878715, 0x20e9e9c9, 0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5,
132 	0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a, 0xdabfbf65, 0x31e6e6d7,
133 	0xc6424284, 0xb86868d0, 0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e,
134 	0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c,
135 };
136 EXPORT_SYMBOL(aes_enc_tab);
137 
138 /* aes_dec_tab[i] contains InvMixColumn([InvSubByte(i), 0, 0, 0]). */
139 const u32 ____cacheline_aligned aes_dec_tab[256] = {
140 	0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a, 0xcb6bab3b, 0xf1459d1f,
141 	0xab58faac, 0x9303e34b, 0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5,
142 	0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5, 0x495ab1de, 0x671bba25,
143 	0x980eea45, 0xe1c0fe5d, 0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b,
144 	0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295, 0x2d83bed4, 0xd3217458,
145 	0x2969e049, 0x44c8c98e, 0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927,
146 	0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d, 0x184adf63, 0x82311ae5,
147 	0x60335197, 0x457f5362, 0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9,
148 	0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52, 0x23d373ab, 0xe2024b72,
149 	0x578f1fe3, 0x2aab5566, 0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3,
150 	0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed, 0x2b1ccf8a, 0x92b479a7,
151 	0xf0f207f3, 0xa1e2694e, 0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4,
152 	0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4, 0x39ec830b, 0xaaef6040,
153 	0x069f715e, 0x51106ebd, 0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d,
154 	0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060, 0x24fb9819, 0x97e9bdd6,
155 	0xcc434089, 0x779ed967, 0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879,
156 	0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000, 0x83868009, 0x48ed2b32,
157 	0xac70111e, 0x4e725a6c, 0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36,
158 	0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624, 0xb1670a0c, 0x0fe75793,
159 	0xd296eeb4, 0x9e919b1b, 0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c,
160 	0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12, 0x0b0d090e, 0xadc78bf2,
161 	0xb9a8b62d, 0xc8a91e14, 0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3,
162 	0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b, 0x7629438b, 0xdcc623cb,
163 	0x68fcedb6, 0x63f1e4b8, 0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684,
164 	0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7, 0x4b2f9e1d, 0xf330b2dc,
165 	0xec52860d, 0xd0e3c177, 0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947,
166 	0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322, 0xc74e4987, 0xc1d138d9,
167 	0xfea2ca8c, 0x360bd498, 0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f,
168 	0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54, 0xc2138df6, 0xe8b8d890,
169 	0x5ef7392e, 0xf5afc382, 0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf,
170 	0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb, 0x097826cd, 0xf418596e,
171 	0x01b79aec, 0xa89a4f83, 0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef,
172 	0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029, 0xafb2a431, 0x31233f2a,
173 	0x3094a5c6, 0xc066a235, 0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733,
174 	0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117, 0x8dd64d76, 0x4db0ef43,
175 	0x544daacc, 0xdf0496e4, 0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546,
176 	0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb, 0x5a1d67b3, 0x52d2db92,
177 	0x335610e9, 0x1347d66d, 0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb,
178 	0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a, 0x59dfd29c, 0x3f73f255,
179 	0x79ce1418, 0xbf37c773, 0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478,
180 	0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2, 0x72c31d16, 0x0c25e2bc,
181 	0x8b493c28, 0x41950dff, 0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664,
182 	0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0,
183 };
184 EXPORT_SYMBOL(aes_dec_tab);
185 
186 /* Prefetch data into L1 cache.  @mem should be cacheline-aligned. */
187 static __always_inline void aes_prefetch(const void *mem, size_t len)
188 {
189 	for (size_t i = 0; i < len; i += L1_CACHE_BYTES)
190 		*(volatile const u8 *)(mem + i);
191 	barrier();
192 }
193 
194 static u32 mul_by_x(u32 w)
195 {
196 	u32 x = w & 0x7f7f7f7f;
197 	u32 y = w & 0x80808080;
198 
199 	/* multiply by polynomial 'x' (0b10) in GF(2^8) */
200 	return (x << 1) ^ (y >> 7) * 0x1b;
201 }
202 
203 static u32 mul_by_x2(u32 w)
204 {
205 	u32 x = w & 0x3f3f3f3f;
206 	u32 y = w & 0x80808080;
207 	u32 z = w & 0x40404040;
208 
209 	/* multiply by polynomial 'x^2' (0b100) in GF(2^8) */
210 	return (x << 2) ^ (y >> 7) * 0x36 ^ (z >> 6) * 0x1b;
211 }
212 
213 static u32 mix_columns(u32 x)
214 {
215 	/*
216 	 * Perform the following matrix multiplication in GF(2^8)
217 	 *
218 	 * | 0x2 0x3 0x1 0x1 |   | x[0] |
219 	 * | 0x1 0x2 0x3 0x1 |   | x[1] |
220 	 * | 0x1 0x1 0x2 0x3 | x | x[2] |
221 	 * | 0x3 0x1 0x1 0x2 |   | x[3] |
222 	 */
223 	u32 y = mul_by_x(x) ^ ror32(x, 16);
224 
225 	return y ^ ror32(x ^ y, 8);
226 }
227 
228 static u32 inv_mix_columns(u32 x)
229 {
230 	/*
231 	 * Perform the following matrix multiplication in GF(2^8)
232 	 *
233 	 * | 0xe 0xb 0xd 0x9 |   | x[0] |
234 	 * | 0x9 0xe 0xb 0xd |   | x[1] |
235 	 * | 0xd 0x9 0xe 0xb | x | x[2] |
236 	 * | 0xb 0xd 0x9 0xe |   | x[3] |
237 	 *
238 	 * which can conveniently be reduced to
239 	 *
240 	 * | 0x2 0x3 0x1 0x1 |   | 0x5 0x0 0x4 0x0 |   | x[0] |
241 	 * | 0x1 0x2 0x3 0x1 |   | 0x0 0x5 0x0 0x4 |   | x[1] |
242 	 * | 0x1 0x1 0x2 0x3 | x | 0x4 0x0 0x5 0x0 | x | x[2] |
243 	 * | 0x3 0x1 0x1 0x2 |   | 0x0 0x4 0x0 0x5 |   | x[3] |
244 	 */
245 	u32 y = mul_by_x2(x);
246 
247 	return mix_columns(x ^ y ^ ror32(y, 16));
248 }
249 
250 static u32 subw(u32 in)
251 {
252 	return (aes_sbox[in & 0xff]) ^
253 	       (aes_sbox[(in >>  8) & 0xff] <<  8) ^
254 	       (aes_sbox[(in >> 16) & 0xff] << 16) ^
255 	       (aes_sbox[(in >> 24) & 0xff] << 24);
256 }
257 
258 static void aes_expandkey_generic(u32 rndkeys[], u32 *inv_rndkeys,
259 				  const u8 *in_key, int key_len)
260 {
261 	u32 kwords = key_len / sizeof(u32);
262 	u32 rc, i, j;
263 
264 	for (i = 0; i < kwords; i++)
265 		rndkeys[i] = get_unaligned_le32(&in_key[i * sizeof(u32)]);
266 
267 	for (i = 0, rc = 1; i < 10; i++, rc = mul_by_x(rc)) {
268 		u32 *rki = &rndkeys[i * kwords];
269 		u32 *rko = rki + kwords;
270 
271 		rko[0] = ror32(subw(rki[kwords - 1]), 8) ^ rc ^ rki[0];
272 		rko[1] = rko[0] ^ rki[1];
273 		rko[2] = rko[1] ^ rki[2];
274 		rko[3] = rko[2] ^ rki[3];
275 
276 		if (key_len == AES_KEYSIZE_192) {
277 			if (i >= 7)
278 				break;
279 			rko[4] = rko[3] ^ rki[4];
280 			rko[5] = rko[4] ^ rki[5];
281 		} else if (key_len == AES_KEYSIZE_256) {
282 			if (i >= 6)
283 				break;
284 			rko[4] = subw(rko[3]) ^ rki[4];
285 			rko[5] = rko[4] ^ rki[5];
286 			rko[6] = rko[5] ^ rki[6];
287 			rko[7] = rko[6] ^ rki[7];
288 		}
289 	}
290 
291 	/*
292 	 * Generate the decryption keys for the Equivalent Inverse Cipher.
293 	 * This involves reversing the order of the round keys, and applying
294 	 * the Inverse Mix Columns transformation to all but the first and
295 	 * the last one.
296 	 */
297 	if (inv_rndkeys) {
298 		inv_rndkeys[0] = rndkeys[key_len + 24];
299 		inv_rndkeys[1] = rndkeys[key_len + 25];
300 		inv_rndkeys[2] = rndkeys[key_len + 26];
301 		inv_rndkeys[3] = rndkeys[key_len + 27];
302 
303 		for (i = 4, j = key_len + 20; j > 0; i += 4, j -= 4) {
304 			inv_rndkeys[i]     = inv_mix_columns(rndkeys[j]);
305 			inv_rndkeys[i + 1] = inv_mix_columns(rndkeys[j + 1]);
306 			inv_rndkeys[i + 2] = inv_mix_columns(rndkeys[j + 2]);
307 			inv_rndkeys[i + 3] = inv_mix_columns(rndkeys[j + 3]);
308 		}
309 
310 		inv_rndkeys[i]     = rndkeys[0];
311 		inv_rndkeys[i + 1] = rndkeys[1];
312 		inv_rndkeys[i + 2] = rndkeys[2];
313 		inv_rndkeys[i + 3] = rndkeys[3];
314 	}
315 }
316 
317 int aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
318 		  unsigned int key_len)
319 {
320 	if (aes_check_keylen(key_len) != 0)
321 		return -EINVAL;
322 	ctx->key_length = key_len;
323 	aes_expandkey_generic(ctx->key_enc, ctx->key_dec, in_key, key_len);
324 	return 0;
325 }
326 EXPORT_SYMBOL(aes_expandkey);
327 
328 static __always_inline u32 enc_quarterround(const u32 w[4], int i, u32 rk)
329 {
330 	return rk ^ aes_enc_tab[(u8)w[i]] ^
331 	       rol32(aes_enc_tab[(u8)(w[(i + 1) % 4] >> 8)], 8) ^
332 	       rol32(aes_enc_tab[(u8)(w[(i + 2) % 4] >> 16)], 16) ^
333 	       rol32(aes_enc_tab[(u8)(w[(i + 3) % 4] >> 24)], 24);
334 }
335 
336 static __always_inline u32 enclast_quarterround(const u32 w[4], int i, u32 rk)
337 {
338 	return rk ^ ((aes_enc_tab[(u8)w[i]] & 0x0000ff00) >> 8) ^
339 	       (aes_enc_tab[(u8)(w[(i + 1) % 4] >> 8)] & 0x0000ff00) ^
340 	       ((aes_enc_tab[(u8)(w[(i + 2) % 4] >> 16)] & 0x0000ff00) << 8) ^
341 	       ((aes_enc_tab[(u8)(w[(i + 3) % 4] >> 24)] & 0x0000ff00) << 16);
342 }
343 
344 static void __maybe_unused aes_encrypt_generic(const u32 rndkeys[], int nrounds,
345 					       u8 out[AES_BLOCK_SIZE],
346 					       const u8 in[AES_BLOCK_SIZE])
347 {
348 	const u32 *rkp = rndkeys;
349 	int n = nrounds - 1;
350 	u32 w[4];
351 
352 	w[0] = get_unaligned_le32(&in[0]) ^ *rkp++;
353 	w[1] = get_unaligned_le32(&in[4]) ^ *rkp++;
354 	w[2] = get_unaligned_le32(&in[8]) ^ *rkp++;
355 	w[3] = get_unaligned_le32(&in[12]) ^ *rkp++;
356 
357 	/*
358 	 * Prefetch the table before doing data and key-dependent loads from it.
359 	 *
360 	 * This is intended only as a basic constant-time hardening measure that
361 	 * avoids interfering with performance too much.  Its effectiveness is
362 	 * not guaranteed.  For proper constant-time AES, a CPU that supports
363 	 * AES instructions should be used instead.
364 	 */
365 	aes_prefetch(aes_enc_tab, sizeof(aes_enc_tab));
366 
367 	do {
368 		u32 w0 = enc_quarterround(w, 0, *rkp++);
369 		u32 w1 = enc_quarterround(w, 1, *rkp++);
370 		u32 w2 = enc_quarterround(w, 2, *rkp++);
371 		u32 w3 = enc_quarterround(w, 3, *rkp++);
372 
373 		w[0] = w0;
374 		w[1] = w1;
375 		w[2] = w2;
376 		w[3] = w3;
377 	} while (--n);
378 
379 	put_unaligned_le32(enclast_quarterround(w, 0, *rkp++), &out[0]);
380 	put_unaligned_le32(enclast_quarterround(w, 1, *rkp++), &out[4]);
381 	put_unaligned_le32(enclast_quarterround(w, 2, *rkp++), &out[8]);
382 	put_unaligned_le32(enclast_quarterround(w, 3, *rkp++), &out[12]);
383 }
384 
385 static __always_inline u32 dec_quarterround(const u32 w[4], int i, u32 rk)
386 {
387 	return rk ^ aes_dec_tab[(u8)w[i]] ^
388 	       rol32(aes_dec_tab[(u8)(w[(i + 3) % 4] >> 8)], 8) ^
389 	       rol32(aes_dec_tab[(u8)(w[(i + 2) % 4] >> 16)], 16) ^
390 	       rol32(aes_dec_tab[(u8)(w[(i + 1) % 4] >> 24)], 24);
391 }
392 
393 static __always_inline u32 declast_quarterround(const u32 w[4], int i, u32 rk)
394 {
395 	return rk ^ aes_inv_sbox[(u8)w[i]] ^
396 	       ((u32)aes_inv_sbox[(u8)(w[(i + 3) % 4] >> 8)] << 8) ^
397 	       ((u32)aes_inv_sbox[(u8)(w[(i + 2) % 4] >> 16)] << 16) ^
398 	       ((u32)aes_inv_sbox[(u8)(w[(i + 1) % 4] >> 24)] << 24);
399 }
400 
401 static void __maybe_unused aes_decrypt_generic(const u32 inv_rndkeys[],
402 					       int nrounds,
403 					       u8 out[AES_BLOCK_SIZE],
404 					       const u8 in[AES_BLOCK_SIZE])
405 {
406 	const u32 *rkp = inv_rndkeys;
407 	int n = nrounds - 1;
408 	u32 w[4];
409 
410 	w[0] = get_unaligned_le32(&in[0]) ^ *rkp++;
411 	w[1] = get_unaligned_le32(&in[4]) ^ *rkp++;
412 	w[2] = get_unaligned_le32(&in[8]) ^ *rkp++;
413 	w[3] = get_unaligned_le32(&in[12]) ^ *rkp++;
414 
415 	aes_prefetch(aes_dec_tab, sizeof(aes_dec_tab));
416 
417 	do {
418 		u32 w0 = dec_quarterround(w, 0, *rkp++);
419 		u32 w1 = dec_quarterround(w, 1, *rkp++);
420 		u32 w2 = dec_quarterround(w, 2, *rkp++);
421 		u32 w3 = dec_quarterround(w, 3, *rkp++);
422 
423 		w[0] = w0;
424 		w[1] = w1;
425 		w[2] = w2;
426 		w[3] = w3;
427 	} while (--n);
428 
429 	aes_prefetch(aes_inv_sbox, sizeof(aes_inv_sbox));
430 	put_unaligned_le32(declast_quarterround(w, 0, *rkp++), &out[0]);
431 	put_unaligned_le32(declast_quarterround(w, 1, *rkp++), &out[4]);
432 	put_unaligned_le32(declast_quarterround(w, 2, *rkp++), &out[8]);
433 	put_unaligned_le32(declast_quarterround(w, 3, *rkp++), &out[12]);
434 }
435 
436 /*
437  * Note: the aes_prepare*key_* names reflect the fact that the implementation
438  * might not actually expand the key.  (The s390 code for example doesn't.)
439  * Where the key is expanded we use the more specific names aes_expandkey_*.
440  *
441  * aes_preparekey_arch() is passed an optional pointer 'inv_k' which points to
442  * the area to store the prepared decryption key.  It will be NULL if the user
443  * is requesting encryption-only.  aes_preparekey_arch() is also passed a valid
444  * 'key_len' and 'nrounds', corresponding to AES-128, AES-192, or AES-256.
445  */
446 #ifdef CONFIG_CRYPTO_LIB_AES_ARCH
447 /* An arch-specific implementation of AES is available.  Include it. */
448 #include "aes.h" /* $(SRCARCH)/aes.h */
449 #else
450 /* No arch-specific implementation of AES is available.  Use generic code. */
451 
452 static void aes_preparekey_arch(union aes_enckey_arch *k,
453 				union aes_invkey_arch *inv_k,
454 				const u8 *in_key, int key_len, int nrounds)
455 {
456 	aes_expandkey_generic(k->rndkeys, inv_k ? inv_k->inv_rndkeys : NULL,
457 			      in_key, key_len);
458 }
459 
460 static void aes_encrypt_arch(const struct aes_enckey *key,
461 			     u8 out[AES_BLOCK_SIZE],
462 			     const u8 in[AES_BLOCK_SIZE])
463 {
464 	aes_encrypt_generic(key->k.rndkeys, key->nrounds, out, in);
465 }
466 
467 static void aes_decrypt_arch(const struct aes_key *key,
468 			     u8 out[AES_BLOCK_SIZE],
469 			     const u8 in[AES_BLOCK_SIZE])
470 {
471 	aes_decrypt_generic(key->inv_k.inv_rndkeys, key->nrounds, out, in);
472 }
473 #endif
474 
475 static int __aes_preparekey(struct aes_enckey *enc_key,
476 			    union aes_invkey_arch *inv_k,
477 			    const u8 *in_key, size_t key_len)
478 {
479 	if (aes_check_keylen(key_len) != 0)
480 		return -EINVAL;
481 	enc_key->len = key_len;
482 	enc_key->nrounds = 6 + key_len / 4;
483 	aes_preparekey_arch(&enc_key->k, inv_k, in_key, key_len,
484 			    enc_key->nrounds);
485 	return 0;
486 }
487 
488 int aes_preparekey(struct aes_key *key, const u8 *in_key, size_t key_len)
489 {
490 	return __aes_preparekey((struct aes_enckey *)key, &key->inv_k,
491 				in_key, key_len);
492 }
493 EXPORT_SYMBOL(aes_preparekey);
494 
495 int aes_prepareenckey(struct aes_enckey *key, const u8 *in_key, size_t key_len)
496 {
497 	return __aes_preparekey(key, NULL, in_key, key_len);
498 }
499 EXPORT_SYMBOL(aes_prepareenckey);
500 
501 void aes_encrypt(aes_encrypt_arg key, u8 out[AES_BLOCK_SIZE],
502 		 const u8 in[AES_BLOCK_SIZE])
503 {
504 	aes_encrypt_arch(key.enc_key, out, in);
505 }
506 EXPORT_SYMBOL(aes_encrypt);
507 
508 void aes_decrypt(const struct aes_key *key, u8 out[AES_BLOCK_SIZE],
509 		 const u8 in[AES_BLOCK_SIZE])
510 {
511 	aes_decrypt_arch(key, out, in);
512 }
513 EXPORT_SYMBOL(aes_decrypt);
514 
515 #ifdef aes_mod_init_arch
516 static int __init aes_mod_init(void)
517 {
518 	aes_mod_init_arch();
519 	return 0;
520 }
521 subsys_initcall(aes_mod_init);
522 
523 static void __exit aes_mod_exit(void)
524 {
525 }
526 module_exit(aes_mod_exit);
527 #endif
528 
529 MODULE_DESCRIPTION("AES block cipher");
530 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
531 MODULE_AUTHOR("Eric Biggers <ebiggers@kernel.org>");
532 MODULE_LICENSE("GPL v2");
533