GNU Linux-libre 4.19.264-gnu1
[releases.git] / arch / x86 / crypto / cast5_avx_glue.c
1 /*
2  * Glue Code for the AVX assembler implemention of the Cast5 Cipher
3  *
4  * Copyright (C) 2012 Johannes Goetzfried
5  *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
20  * USA
21  *
22  */
23
24 #include <asm/crypto/glue_helper.h>
25 #include <crypto/algapi.h>
26 #include <crypto/cast5.h>
27 #include <crypto/internal/simd.h>
28 #include <linux/crypto.h>
29 #include <linux/err.h>
30 #include <linux/module.h>
31 #include <linux/types.h>
32
33 #define CAST5_PARALLEL_BLOCKS 16
34
35 asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst,
36                                     const u8 *src);
37 asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst,
38                                     const u8 *src);
39 asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst,
40                                     const u8 *src);
41 asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src,
42                                 __be64 *iv);
43
44 static int cast5_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
45                                  unsigned int keylen)
46 {
47         return cast5_setkey(&tfm->base, key, keylen);
48 }
49
50 static inline bool cast5_fpu_begin(bool fpu_enabled, struct skcipher_walk *walk,
51                                    unsigned int nbytes)
52 {
53         return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
54                               walk, fpu_enabled, nbytes);
55 }
56
57 static inline void cast5_fpu_end(bool fpu_enabled)
58 {
59         return glue_fpu_end(fpu_enabled);
60 }
61
62 static int ecb_crypt(struct skcipher_request *req, bool enc)
63 {
64         bool fpu_enabled = false;
65         struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
66         struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
67         struct skcipher_walk walk;
68         const unsigned int bsize = CAST5_BLOCK_SIZE;
69         unsigned int nbytes;
70         void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src);
71         int err;
72
73         err = skcipher_walk_virt(&walk, req, false);
74
75         while ((nbytes = walk.nbytes)) {
76                 u8 *wsrc = walk.src.virt.addr;
77                 u8 *wdst = walk.dst.virt.addr;
78
79                 fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
80
81                 /* Process multi-block batch */
82                 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
83                         fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way;
84                         do {
85                                 fn(ctx, wdst, wsrc);
86
87                                 wsrc += bsize * CAST5_PARALLEL_BLOCKS;
88                                 wdst += bsize * CAST5_PARALLEL_BLOCKS;
89                                 nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
90                         } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
91
92                         if (nbytes < bsize)
93                                 goto done;
94                 }
95
96                 fn = (enc) ? __cast5_encrypt : __cast5_decrypt;
97
98                 /* Handle leftovers */
99                 do {
100                         fn(ctx, wdst, wsrc);
101
102                         wsrc += bsize;
103                         wdst += bsize;
104                         nbytes -= bsize;
105                 } while (nbytes >= bsize);
106
107 done:
108                 err = skcipher_walk_done(&walk, nbytes);
109         }
110
111         cast5_fpu_end(fpu_enabled);
112         return err;
113 }
114
115 static int ecb_encrypt(struct skcipher_request *req)
116 {
117         return ecb_crypt(req, true);
118 }
119
120 static int ecb_decrypt(struct skcipher_request *req)
121 {
122         return ecb_crypt(req, false);
123 }
124
125 static int cbc_encrypt(struct skcipher_request *req)
126 {
127         const unsigned int bsize = CAST5_BLOCK_SIZE;
128         struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
129         struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
130         struct skcipher_walk walk;
131         unsigned int nbytes;
132         int err;
133
134         err = skcipher_walk_virt(&walk, req, false);
135
136         while ((nbytes = walk.nbytes)) {
137                 u64 *src = (u64 *)walk.src.virt.addr;
138                 u64 *dst = (u64 *)walk.dst.virt.addr;
139                 u64 *iv = (u64 *)walk.iv;
140
141                 do {
142                         *dst = *src ^ *iv;
143                         __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
144                         iv = dst;
145                         src++;
146                         dst++;
147                         nbytes -= bsize;
148                 } while (nbytes >= bsize);
149
150                 *(u64 *)walk.iv = *iv;
151                 err = skcipher_walk_done(&walk, nbytes);
152         }
153
154         return err;
155 }
156
157 static unsigned int __cbc_decrypt(struct cast5_ctx *ctx,
158                                   struct skcipher_walk *walk)
159 {
160         const unsigned int bsize = CAST5_BLOCK_SIZE;
161         unsigned int nbytes = walk->nbytes;
162         u64 *src = (u64 *)walk->src.virt.addr;
163         u64 *dst = (u64 *)walk->dst.virt.addr;
164         u64 last_iv;
165
166         /* Start of the last block. */
167         src += nbytes / bsize - 1;
168         dst += nbytes / bsize - 1;
169
170         last_iv = *src;
171
172         /* Process multi-block batch */
173         if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
174                 do {
175                         nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1);
176                         src -= CAST5_PARALLEL_BLOCKS - 1;
177                         dst -= CAST5_PARALLEL_BLOCKS - 1;
178
179                         cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src);
180
181                         nbytes -= bsize;
182                         if (nbytes < bsize)
183                                 goto done;
184
185                         *dst ^= *(src - 1);
186                         src -= 1;
187                         dst -= 1;
188                 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
189         }
190
191         /* Handle leftovers */
192         for (;;) {
193                 __cast5_decrypt(ctx, (u8 *)dst, (u8 *)src);
194
195                 nbytes -= bsize;
196                 if (nbytes < bsize)
197                         break;
198
199                 *dst ^= *(src - 1);
200                 src -= 1;
201                 dst -= 1;
202         }
203
204 done:
205         *dst ^= *(u64 *)walk->iv;
206         *(u64 *)walk->iv = last_iv;
207
208         return nbytes;
209 }
210
211 static int cbc_decrypt(struct skcipher_request *req)
212 {
213         struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
214         struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
215         bool fpu_enabled = false;
216         struct skcipher_walk walk;
217         unsigned int nbytes;
218         int err;
219
220         err = skcipher_walk_virt(&walk, req, false);
221
222         while ((nbytes = walk.nbytes)) {
223                 fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
224                 nbytes = __cbc_decrypt(ctx, &walk);
225                 err = skcipher_walk_done(&walk, nbytes);
226         }
227
228         cast5_fpu_end(fpu_enabled);
229         return err;
230 }
231
232 static void ctr_crypt_final(struct skcipher_walk *walk, struct cast5_ctx *ctx)
233 {
234         u8 *ctrblk = walk->iv;
235         u8 keystream[CAST5_BLOCK_SIZE];
236         u8 *src = walk->src.virt.addr;
237         u8 *dst = walk->dst.virt.addr;
238         unsigned int nbytes = walk->nbytes;
239
240         __cast5_encrypt(ctx, keystream, ctrblk);
241         crypto_xor_cpy(dst, keystream, src, nbytes);
242
243         crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
244 }
245
246 static unsigned int __ctr_crypt(struct skcipher_walk *walk,
247                                 struct cast5_ctx *ctx)
248 {
249         const unsigned int bsize = CAST5_BLOCK_SIZE;
250         unsigned int nbytes = walk->nbytes;
251         u64 *src = (u64 *)walk->src.virt.addr;
252         u64 *dst = (u64 *)walk->dst.virt.addr;
253
254         /* Process multi-block batch */
255         if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
256                 do {
257                         cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src,
258                                         (__be64 *)walk->iv);
259
260                         src += CAST5_PARALLEL_BLOCKS;
261                         dst += CAST5_PARALLEL_BLOCKS;
262                         nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
263                 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
264
265                 if (nbytes < bsize)
266                         goto done;
267         }
268
269         /* Handle leftovers */
270         do {
271                 u64 ctrblk;
272
273                 if (dst != src)
274                         *dst = *src;
275
276                 ctrblk = *(u64 *)walk->iv;
277                 be64_add_cpu((__be64 *)walk->iv, 1);
278
279                 __cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
280                 *dst ^= ctrblk;
281
282                 src += 1;
283                 dst += 1;
284                 nbytes -= bsize;
285         } while (nbytes >= bsize);
286
287 done:
288         return nbytes;
289 }
290
291 static int ctr_crypt(struct skcipher_request *req)
292 {
293         struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
294         struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
295         bool fpu_enabled = false;
296         struct skcipher_walk walk;
297         unsigned int nbytes;
298         int err;
299
300         err = skcipher_walk_virt(&walk, req, false);
301
302         while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
303                 fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
304                 nbytes = __ctr_crypt(&walk, ctx);
305                 err = skcipher_walk_done(&walk, nbytes);
306         }
307
308         cast5_fpu_end(fpu_enabled);
309
310         if (walk.nbytes) {
311                 ctr_crypt_final(&walk, ctx);
312                 err = skcipher_walk_done(&walk, 0);
313         }
314
315         return err;
316 }
317
318 static struct skcipher_alg cast5_algs[] = {
319         {
320                 .base.cra_name          = "__ecb(cast5)",
321                 .base.cra_driver_name   = "__ecb-cast5-avx",
322                 .base.cra_priority      = 200,
323                 .base.cra_flags         = CRYPTO_ALG_INTERNAL,
324                 .base.cra_blocksize     = CAST5_BLOCK_SIZE,
325                 .base.cra_ctxsize       = sizeof(struct cast5_ctx),
326                 .base.cra_module        = THIS_MODULE,
327                 .min_keysize            = CAST5_MIN_KEY_SIZE,
328                 .max_keysize            = CAST5_MAX_KEY_SIZE,
329                 .setkey                 = cast5_setkey_skcipher,
330                 .encrypt                = ecb_encrypt,
331                 .decrypt                = ecb_decrypt,
332         }, {
333                 .base.cra_name          = "__cbc(cast5)",
334                 .base.cra_driver_name   = "__cbc-cast5-avx",
335                 .base.cra_priority      = 200,
336                 .base.cra_flags         = CRYPTO_ALG_INTERNAL,
337                 .base.cra_blocksize     = CAST5_BLOCK_SIZE,
338                 .base.cra_ctxsize       = sizeof(struct cast5_ctx),
339                 .base.cra_module        = THIS_MODULE,
340                 .min_keysize            = CAST5_MIN_KEY_SIZE,
341                 .max_keysize            = CAST5_MAX_KEY_SIZE,
342                 .ivsize                 = CAST5_BLOCK_SIZE,
343                 .setkey                 = cast5_setkey_skcipher,
344                 .encrypt                = cbc_encrypt,
345                 .decrypt                = cbc_decrypt,
346         }, {
347                 .base.cra_name          = "__ctr(cast5)",
348                 .base.cra_driver_name   = "__ctr-cast5-avx",
349                 .base.cra_priority      = 200,
350                 .base.cra_flags         = CRYPTO_ALG_INTERNAL,
351                 .base.cra_blocksize     = 1,
352                 .base.cra_ctxsize       = sizeof(struct cast5_ctx),
353                 .base.cra_module        = THIS_MODULE,
354                 .min_keysize            = CAST5_MIN_KEY_SIZE,
355                 .max_keysize            = CAST5_MAX_KEY_SIZE,
356                 .ivsize                 = CAST5_BLOCK_SIZE,
357                 .chunksize              = CAST5_BLOCK_SIZE,
358                 .setkey                 = cast5_setkey_skcipher,
359                 .encrypt                = ctr_crypt,
360                 .decrypt                = ctr_crypt,
361         }
362 };
363
364 static struct simd_skcipher_alg *cast5_simd_algs[ARRAY_SIZE(cast5_algs)];
365
366 static int __init cast5_init(void)
367 {
368         const char *feature_name;
369
370         if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
371                                 &feature_name)) {
372                 pr_info("CPU feature '%s' is not supported.\n", feature_name);
373                 return -ENODEV;
374         }
375
376         return simd_register_skciphers_compat(cast5_algs,
377                                               ARRAY_SIZE(cast5_algs),
378                                               cast5_simd_algs);
379 }
380
381 static void __exit cast5_exit(void)
382 {
383         simd_unregister_skciphers(cast5_algs, ARRAY_SIZE(cast5_algs),
384                                   cast5_simd_algs);
385 }
386
387 module_init(cast5_init);
388 module_exit(cast5_exit);
389
390 MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized");
391 MODULE_LICENSE("GPL");
392 MODULE_ALIAS_CRYPTO("cast5");