GNU Linux-libre 4.9-gnu1
[releases.git] / drivers / crypto / ccp / ccp-dev-v5.c
1 /*
2  * AMD Cryptographic Coprocessor (CCP) driver
3  *
4  * Copyright (C) 2016 Advanced Micro Devices, Inc.
5  *
6  * Author: Gary R Hook <gary.hook@amd.com>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 as
10  * published by the Free Software Foundation.
11  */
12
13 #include <linux/module.h>
14 #include <linux/kernel.h>
15 #include <linux/pci.h>
16 #include <linux/kthread.h>
17 #include <linux/dma-mapping.h>
18 #include <linux/interrupt.h>
19 #include <linux/compiler.h>
20 #include <linux/ccp.h>
21
22 #include "ccp-dev.h"
23
24 static u32 ccp_lsb_alloc(struct ccp_cmd_queue *cmd_q, unsigned int count)
25 {
26         struct ccp_device *ccp;
27         int start;
28
29         /* First look at the map for the queue */
30         if (cmd_q->lsb >= 0) {
31                 start = (u32)bitmap_find_next_zero_area(cmd_q->lsbmap,
32                                                         LSB_SIZE,
33                                                         0, count, 0);
34                 if (start < LSB_SIZE) {
35                         bitmap_set(cmd_q->lsbmap, start, count);
36                         return start + cmd_q->lsb * LSB_SIZE;
37                 }
38         }
39
40         /* No joy; try to get an entry from the shared blocks */
41         ccp = cmd_q->ccp;
42         for (;;) {
43                 mutex_lock(&ccp->sb_mutex);
44
45                 start = (u32)bitmap_find_next_zero_area(ccp->lsbmap,
46                                                         MAX_LSB_CNT * LSB_SIZE,
47                                                         0,
48                                                         count, 0);
49                 if (start <= MAX_LSB_CNT * LSB_SIZE) {
50                         bitmap_set(ccp->lsbmap, start, count);
51
52                         mutex_unlock(&ccp->sb_mutex);
53                         return start * LSB_ITEM_SIZE;
54                 }
55
56                 ccp->sb_avail = 0;
57
58                 mutex_unlock(&ccp->sb_mutex);
59
60                 /* Wait for KSB entries to become available */
61                 if (wait_event_interruptible(ccp->sb_queue, ccp->sb_avail))
62                         return 0;
63         }
64 }
65
66 static void ccp_lsb_free(struct ccp_cmd_queue *cmd_q, unsigned int start,
67                          unsigned int count)
68 {
69         int lsbno = start / LSB_SIZE;
70
71         if (!start)
72                 return;
73
74         if (cmd_q->lsb == lsbno) {
75                 /* An entry from the private LSB */
76                 bitmap_clear(cmd_q->lsbmap, start % LSB_SIZE, count);
77         } else {
78                 /* From the shared LSBs */
79                 struct ccp_device *ccp = cmd_q->ccp;
80
81                 mutex_lock(&ccp->sb_mutex);
82                 bitmap_clear(ccp->lsbmap, start, count);
83                 ccp->sb_avail = 1;
84                 mutex_unlock(&ccp->sb_mutex);
85                 wake_up_interruptible_all(&ccp->sb_queue);
86         }
87 }
88
89 /* CCP version 5: Union to define the function field (cmd_reg1/dword0) */
90 union ccp_function {
91         struct {
92                 u16 size:7;
93                 u16 encrypt:1;
94                 u16 mode:5;
95                 u16 type:2;
96         } aes;
97         struct {
98                 u16 size:7;
99                 u16 encrypt:1;
100                 u16 rsvd:5;
101                 u16 type:2;
102         } aes_xts;
103         struct {
104                 u16 rsvd1:10;
105                 u16 type:4;
106                 u16 rsvd2:1;
107         } sha;
108         struct {
109                 u16 mode:3;
110                 u16 size:12;
111         } rsa;
112         struct {
113                 u16 byteswap:2;
114                 u16 bitwise:3;
115                 u16 reflect:2;
116                 u16 rsvd:8;
117         } pt;
118         struct  {
119                 u16 rsvd:13;
120         } zlib;
121         struct {
122                 u16 size:10;
123                 u16 type:2;
124                 u16 mode:3;
125         } ecc;
126         u16 raw;
127 };
128
129 #define CCP_AES_SIZE(p)         ((p)->aes.size)
130 #define CCP_AES_ENCRYPT(p)      ((p)->aes.encrypt)
131 #define CCP_AES_MODE(p)         ((p)->aes.mode)
132 #define CCP_AES_TYPE(p)         ((p)->aes.type)
133 #define CCP_XTS_SIZE(p)         ((p)->aes_xts.size)
134 #define CCP_XTS_ENCRYPT(p)      ((p)->aes_xts.encrypt)
135 #define CCP_SHA_TYPE(p)         ((p)->sha.type)
136 #define CCP_RSA_SIZE(p)         ((p)->rsa.size)
137 #define CCP_PT_BYTESWAP(p)      ((p)->pt.byteswap)
138 #define CCP_PT_BITWISE(p)       ((p)->pt.bitwise)
139 #define CCP_ECC_MODE(p)         ((p)->ecc.mode)
140 #define CCP_ECC_AFFINE(p)       ((p)->ecc.one)
141
142 /* Word 0 */
143 #define CCP5_CMD_DW0(p)         ((p)->dw0)
144 #define CCP5_CMD_SOC(p)         (CCP5_CMD_DW0(p).soc)
145 #define CCP5_CMD_IOC(p)         (CCP5_CMD_DW0(p).ioc)
146 #define CCP5_CMD_INIT(p)        (CCP5_CMD_DW0(p).init)
147 #define CCP5_CMD_EOM(p)         (CCP5_CMD_DW0(p).eom)
148 #define CCP5_CMD_FUNCTION(p)    (CCP5_CMD_DW0(p).function)
149 #define CCP5_CMD_ENGINE(p)      (CCP5_CMD_DW0(p).engine)
150 #define CCP5_CMD_PROT(p)        (CCP5_CMD_DW0(p).prot)
151
152 /* Word 1 */
153 #define CCP5_CMD_DW1(p)         ((p)->length)
154 #define CCP5_CMD_LEN(p)         (CCP5_CMD_DW1(p))
155
156 /* Word 2 */
157 #define CCP5_CMD_DW2(p)         ((p)->src_lo)
158 #define CCP5_CMD_SRC_LO(p)      (CCP5_CMD_DW2(p))
159
160 /* Word 3 */
161 #define CCP5_CMD_DW3(p)         ((p)->dw3)
162 #define CCP5_CMD_SRC_MEM(p)     ((p)->dw3.src_mem)
163 #define CCP5_CMD_SRC_HI(p)      ((p)->dw3.src_hi)
164 #define CCP5_CMD_LSB_ID(p)      ((p)->dw3.lsb_cxt_id)
165 #define CCP5_CMD_FIX_SRC(p)     ((p)->dw3.fixed)
166
167 /* Words 4/5 */
168 #define CCP5_CMD_DW4(p)         ((p)->dw4)
169 #define CCP5_CMD_DST_LO(p)      (CCP5_CMD_DW4(p).dst_lo)
170 #define CCP5_CMD_DW5(p)         ((p)->dw5.fields.dst_hi)
171 #define CCP5_CMD_DST_HI(p)      (CCP5_CMD_DW5(p))
172 #define CCP5_CMD_DST_MEM(p)     ((p)->dw5.fields.dst_mem)
173 #define CCP5_CMD_FIX_DST(p)     ((p)->dw5.fields.fixed)
174 #define CCP5_CMD_SHA_LO(p)      ((p)->dw4.sha_len_lo)
175 #define CCP5_CMD_SHA_HI(p)      ((p)->dw5.sha_len_hi)
176
177 /* Word 6/7 */
178 #define CCP5_CMD_DW6(p)         ((p)->key_lo)
179 #define CCP5_CMD_KEY_LO(p)      (CCP5_CMD_DW6(p))
180 #define CCP5_CMD_DW7(p)         ((p)->dw7)
181 #define CCP5_CMD_KEY_HI(p)      ((p)->dw7.key_hi)
182 #define CCP5_CMD_KEY_MEM(p)     ((p)->dw7.key_mem)
183
184 static inline u32 low_address(unsigned long addr)
185 {
186         return (u64)addr & 0x0ffffffff;
187 }
188
189 static inline u32 high_address(unsigned long addr)
190 {
191         return ((u64)addr >> 32) & 0x00000ffff;
192 }
193
194 static unsigned int ccp5_get_free_slots(struct ccp_cmd_queue *cmd_q)
195 {
196         unsigned int head_idx, n;
197         u32 head_lo, queue_start;
198
199         queue_start = low_address(cmd_q->qdma_tail);
200         head_lo = ioread32(cmd_q->reg_head_lo);
201         head_idx = (head_lo - queue_start) / sizeof(struct ccp5_desc);
202
203         n = head_idx + COMMANDS_PER_QUEUE - cmd_q->qidx - 1;
204
205         return n % COMMANDS_PER_QUEUE; /* Always one unused spot */
206 }
207
208 static int ccp5_do_cmd(struct ccp5_desc *desc,
209                        struct ccp_cmd_queue *cmd_q)
210 {
211         u32 *mP;
212         __le32 *dP;
213         u32 tail;
214         int     i;
215         int ret = 0;
216
217         if (CCP5_CMD_SOC(desc)) {
218                 CCP5_CMD_IOC(desc) = 1;
219                 CCP5_CMD_SOC(desc) = 0;
220         }
221         mutex_lock(&cmd_q->q_mutex);
222
223         mP = (u32 *) &cmd_q->qbase[cmd_q->qidx];
224         dP = (__le32 *) desc;
225         for (i = 0; i < 8; i++)
226                 mP[i] = cpu_to_le32(dP[i]); /* handle endianness */
227
228         cmd_q->qidx = (cmd_q->qidx + 1) % COMMANDS_PER_QUEUE;
229
230         /* The data used by this command must be flushed to memory */
231         wmb();
232
233         /* Write the new tail address back to the queue register */
234         tail = low_address(cmd_q->qdma_tail + cmd_q->qidx * Q_DESC_SIZE);
235         iowrite32(tail, cmd_q->reg_tail_lo);
236
237         /* Turn the queue back on using our cached control register */
238         iowrite32(cmd_q->qcontrol | CMD5_Q_RUN, cmd_q->reg_control);
239         mutex_unlock(&cmd_q->q_mutex);
240
241         if (CCP5_CMD_IOC(desc)) {
242                 /* Wait for the job to complete */
243                 ret = wait_event_interruptible(cmd_q->int_queue,
244                                                cmd_q->int_rcvd);
245                 if (ret || cmd_q->cmd_error) {
246                         if (cmd_q->cmd_error)
247                                 ccp_log_error(cmd_q->ccp,
248                                               cmd_q->cmd_error);
249                         /* A version 5 device doesn't use Job IDs... */
250                         if (!ret)
251                                 ret = -EIO;
252                 }
253                 cmd_q->int_rcvd = 0;
254         }
255
256         return 0;
257 }
258
259 static int ccp5_perform_aes(struct ccp_op *op)
260 {
261         struct ccp5_desc desc;
262         union ccp_function function;
263         u32 key_addr = op->sb_key * LSB_ITEM_SIZE;
264
265         /* Zero out all the fields of the command desc */
266         memset(&desc, 0, Q_DESC_SIZE);
267
268         CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_AES;
269
270         CCP5_CMD_SOC(&desc) = op->soc;
271         CCP5_CMD_IOC(&desc) = 1;
272         CCP5_CMD_INIT(&desc) = op->init;
273         CCP5_CMD_EOM(&desc) = op->eom;
274         CCP5_CMD_PROT(&desc) = 0;
275
276         function.raw = 0;
277         CCP_AES_ENCRYPT(&function) = op->u.aes.action;
278         CCP_AES_MODE(&function) = op->u.aes.mode;
279         CCP_AES_TYPE(&function) = op->u.aes.type;
280         if (op->u.aes.mode == CCP_AES_MODE_CFB)
281                 CCP_AES_SIZE(&function) = 0x7f;
282
283         CCP5_CMD_FUNCTION(&desc) = function.raw;
284
285         CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
286
287         CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
288         CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
289         CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
290
291         CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
292         CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
293         CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
294
295         CCP5_CMD_KEY_LO(&desc) = lower_32_bits(key_addr);
296         CCP5_CMD_KEY_HI(&desc) = 0;
297         CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB;
298         CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;
299
300         return ccp5_do_cmd(&desc, op->cmd_q);
301 }
302
303 static int ccp5_perform_xts_aes(struct ccp_op *op)
304 {
305         struct ccp5_desc desc;
306         union ccp_function function;
307         u32 key_addr = op->sb_key * LSB_ITEM_SIZE;
308
309         /* Zero out all the fields of the command desc */
310         memset(&desc, 0, Q_DESC_SIZE);
311
312         CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_XTS_AES_128;
313
314         CCP5_CMD_SOC(&desc) = op->soc;
315         CCP5_CMD_IOC(&desc) = 1;
316         CCP5_CMD_INIT(&desc) = op->init;
317         CCP5_CMD_EOM(&desc) = op->eom;
318         CCP5_CMD_PROT(&desc) = 0;
319
320         function.raw = 0;
321         CCP_XTS_ENCRYPT(&function) = op->u.xts.action;
322         CCP_XTS_SIZE(&function) = op->u.xts.unit_size;
323         CCP5_CMD_FUNCTION(&desc) = function.raw;
324
325         CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
326
327         CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
328         CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
329         CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
330
331         CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
332         CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
333         CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
334
335         CCP5_CMD_KEY_LO(&desc) = lower_32_bits(key_addr);
336         CCP5_CMD_KEY_HI(&desc) =  0;
337         CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB;
338         CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;
339
340         return ccp5_do_cmd(&desc, op->cmd_q);
341 }
342
343 static int ccp5_perform_sha(struct ccp_op *op)
344 {
345         struct ccp5_desc desc;
346         union ccp_function function;
347
348         /* Zero out all the fields of the command desc */
349         memset(&desc, 0, Q_DESC_SIZE);
350
351         CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_SHA;
352
353         CCP5_CMD_SOC(&desc) = op->soc;
354         CCP5_CMD_IOC(&desc) = 1;
355         CCP5_CMD_INIT(&desc) = 1;
356         CCP5_CMD_EOM(&desc) = op->eom;
357         CCP5_CMD_PROT(&desc) = 0;
358
359         function.raw = 0;
360         CCP_SHA_TYPE(&function) = op->u.sha.type;
361         CCP5_CMD_FUNCTION(&desc) = function.raw;
362
363         CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
364
365         CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
366         CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
367         CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
368
369         CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;
370
371         if (op->eom) {
372                 CCP5_CMD_SHA_LO(&desc) = lower_32_bits(op->u.sha.msg_bits);
373                 CCP5_CMD_SHA_HI(&desc) = upper_32_bits(op->u.sha.msg_bits);
374         } else {
375                 CCP5_CMD_SHA_LO(&desc) = 0;
376                 CCP5_CMD_SHA_HI(&desc) = 0;
377         }
378
379         return ccp5_do_cmd(&desc, op->cmd_q);
380 }
381
382 static int ccp5_perform_rsa(struct ccp_op *op)
383 {
384         struct ccp5_desc desc;
385         union ccp_function function;
386
387         /* Zero out all the fields of the command desc */
388         memset(&desc, 0, Q_DESC_SIZE);
389
390         CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_RSA;
391
392         CCP5_CMD_SOC(&desc) = op->soc;
393         CCP5_CMD_IOC(&desc) = 1;
394         CCP5_CMD_INIT(&desc) = 0;
395         CCP5_CMD_EOM(&desc) = 1;
396         CCP5_CMD_PROT(&desc) = 0;
397
398         function.raw = 0;
399         CCP_RSA_SIZE(&function) = op->u.rsa.mod_size;
400         CCP5_CMD_FUNCTION(&desc) = function.raw;
401
402         CCP5_CMD_LEN(&desc) = op->u.rsa.input_len;
403
404         /* Source is from external memory */
405         CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
406         CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
407         CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
408
409         /* Destination is in external memory */
410         CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
411         CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
412         CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
413
414         /* Key (Exponent) is in external memory */
415         CCP5_CMD_KEY_LO(&desc) = ccp_addr_lo(&op->exp.u.dma);
416         CCP5_CMD_KEY_HI(&desc) = ccp_addr_hi(&op->exp.u.dma);
417         CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
418
419         return ccp5_do_cmd(&desc, op->cmd_q);
420 }
421
422 static int ccp5_perform_passthru(struct ccp_op *op)
423 {
424         struct ccp5_desc desc;
425         union ccp_function function;
426         struct ccp_dma_info *saddr = &op->src.u.dma;
427         struct ccp_dma_info *daddr = &op->dst.u.dma;
428
429         memset(&desc, 0, Q_DESC_SIZE);
430
431         CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_PASSTHRU;
432
433         CCP5_CMD_SOC(&desc) = 0;
434         CCP5_CMD_IOC(&desc) = 1;
435         CCP5_CMD_INIT(&desc) = 0;
436         CCP5_CMD_EOM(&desc) = op->eom;
437         CCP5_CMD_PROT(&desc) = 0;
438
439         function.raw = 0;
440         CCP_PT_BYTESWAP(&function) = op->u.passthru.byte_swap;
441         CCP_PT_BITWISE(&function) = op->u.passthru.bit_mod;
442         CCP5_CMD_FUNCTION(&desc) = function.raw;
443
444         /* Length of source data is always 256 bytes */
445         if (op->src.type == CCP_MEMTYPE_SYSTEM)
446                 CCP5_CMD_LEN(&desc) = saddr->length;
447         else
448                 CCP5_CMD_LEN(&desc) = daddr->length;
449
450         if (op->src.type == CCP_MEMTYPE_SYSTEM) {
451                 CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
452                 CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
453                 CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
454
455                 if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
456                         CCP5_CMD_LSB_ID(&desc) = op->sb_key;
457         } else {
458                 u32 key_addr = op->src.u.sb * CCP_SB_BYTES;
459
460                 CCP5_CMD_SRC_LO(&desc) = lower_32_bits(key_addr);
461                 CCP5_CMD_SRC_HI(&desc) = 0;
462                 CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SB;
463         }
464
465         if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
466                 CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
467                 CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
468                 CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
469         } else {
470                 u32 key_addr = op->dst.u.sb * CCP_SB_BYTES;
471
472                 CCP5_CMD_DST_LO(&desc) = lower_32_bits(key_addr);
473                 CCP5_CMD_DST_HI(&desc) = 0;
474                 CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SB;
475         }
476
477         return ccp5_do_cmd(&desc, op->cmd_q);
478 }
479
480 static int ccp5_perform_ecc(struct ccp_op *op)
481 {
482         struct ccp5_desc desc;
483         union ccp_function function;
484
485         /* Zero out all the fields of the command desc */
486         memset(&desc, 0, Q_DESC_SIZE);
487
488         CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_ECC;
489
490         CCP5_CMD_SOC(&desc) = 0;
491         CCP5_CMD_IOC(&desc) = 1;
492         CCP5_CMD_INIT(&desc) = 0;
493         CCP5_CMD_EOM(&desc) = 1;
494         CCP5_CMD_PROT(&desc) = 0;
495
496         function.raw = 0;
497         function.ecc.mode = op->u.ecc.function;
498         CCP5_CMD_FUNCTION(&desc) = function.raw;
499
500         CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
501
502         CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
503         CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
504         CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
505
506         CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
507         CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
508         CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
509
510         return ccp5_do_cmd(&desc, op->cmd_q);
511 }
512
513 static int ccp_find_lsb_regions(struct ccp_cmd_queue *cmd_q, u64 status)
514 {
515         int q_mask = 1 << cmd_q->id;
516         int queues = 0;
517         int j;
518
519         /* Build a bit mask to know which LSBs this queue has access to.
520          * Don't bother with segment 0 as it has special privileges.
521          */
522         for (j = 1; j < MAX_LSB_CNT; j++) {
523                 if (status & q_mask)
524                         bitmap_set(cmd_q->lsbmask, j, 1);
525                 status >>= LSB_REGION_WIDTH;
526         }
527         queues = bitmap_weight(cmd_q->lsbmask, MAX_LSB_CNT);
528         dev_info(cmd_q->ccp->dev, "Queue %d can access %d LSB regions\n",
529                  cmd_q->id, queues);
530
531         return queues ? 0 : -EINVAL;
532 }
533
534
535 static int ccp_find_and_assign_lsb_to_q(struct ccp_device *ccp,
536                                         int lsb_cnt, int n_lsbs,
537                                         unsigned long *lsb_pub)
538 {
539         DECLARE_BITMAP(qlsb, MAX_LSB_CNT);
540         int bitno;
541         int qlsb_wgt;
542         int i;
543
544         /* For each queue:
545          * If the count of potential LSBs available to a queue matches the
546          * ordinal given to us in lsb_cnt:
547          * Copy the mask of possible LSBs for this queue into "qlsb";
548          * For each bit in qlsb, see if the corresponding bit in the
549          * aggregation mask is set; if so, we have a match.
550          *     If we have a match, clear the bit in the aggregation to
551          *     mark it as no longer available.
552          *     If there is no match, clear the bit in qlsb and keep looking.
553          */
554         for (i = 0; i < ccp->cmd_q_count; i++) {
555                 struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i];
556
557                 qlsb_wgt = bitmap_weight(cmd_q->lsbmask, MAX_LSB_CNT);
558
559                 if (qlsb_wgt == lsb_cnt) {
560                         bitmap_copy(qlsb, cmd_q->lsbmask, MAX_LSB_CNT);
561
562                         bitno = find_first_bit(qlsb, MAX_LSB_CNT);
563                         while (bitno < MAX_LSB_CNT) {
564                                 if (test_bit(bitno, lsb_pub)) {
565                                         /* We found an available LSB
566                                          * that this queue can access
567                                          */
568                                         cmd_q->lsb = bitno;
569                                         bitmap_clear(lsb_pub, bitno, 1);
570                                         dev_info(ccp->dev,
571                                                  "Queue %d gets LSB %d\n",
572                                                  i, bitno);
573                                         break;
574                                 }
575                                 bitmap_clear(qlsb, bitno, 1);
576                                 bitno = find_first_bit(qlsb, MAX_LSB_CNT);
577                         }
578                         if (bitno >= MAX_LSB_CNT)
579                                 return -EINVAL;
580                         n_lsbs--;
581                 }
582         }
583         return n_lsbs;
584 }
585
586 /* For each queue, from the most- to least-constrained:
587  * find an LSB that can be assigned to the queue. If there are N queues that
588  * can only use M LSBs, where N > M, fail; otherwise, every queue will get a
589  * dedicated LSB. Remaining LSB regions become a shared resource.
590  * If we have fewer LSBs than queues, all LSB regions become shared resources.
591  */
592 static int ccp_assign_lsbs(struct ccp_device *ccp)
593 {
594         DECLARE_BITMAP(lsb_pub, MAX_LSB_CNT);
595         DECLARE_BITMAP(qlsb, MAX_LSB_CNT);
596         int n_lsbs = 0;
597         int bitno;
598         int i, lsb_cnt;
599         int rc = 0;
600
601         bitmap_zero(lsb_pub, MAX_LSB_CNT);
602
603         /* Create an aggregate bitmap to get a total count of available LSBs */
604         for (i = 0; i < ccp->cmd_q_count; i++)
605                 bitmap_or(lsb_pub,
606                           lsb_pub, ccp->cmd_q[i].lsbmask,
607                           MAX_LSB_CNT);
608
609         n_lsbs = bitmap_weight(lsb_pub, MAX_LSB_CNT);
610
611         if (n_lsbs >= ccp->cmd_q_count) {
612                 /* We have enough LSBS to give every queue a private LSB.
613                  * Brute force search to start with the queues that are more
614                  * constrained in LSB choice. When an LSB is privately
615                  * assigned, it is removed from the public mask.
616                  * This is an ugly N squared algorithm with some optimization.
617                  */
618                 for (lsb_cnt = 1;
619                      n_lsbs && (lsb_cnt <= MAX_LSB_CNT);
620                      lsb_cnt++) {
621                         rc = ccp_find_and_assign_lsb_to_q(ccp, lsb_cnt, n_lsbs,
622                                                           lsb_pub);
623                         if (rc < 0)
624                                 return -EINVAL;
625                         n_lsbs = rc;
626                 }
627         }
628
629         rc = 0;
630         /* What's left of the LSBs, according to the public mask, now become
631          * shared. Any zero bits in the lsb_pub mask represent an LSB region
632          * that can't be used as a shared resource, so mark the LSB slots for
633          * them as "in use".
634          */
635         bitmap_copy(qlsb, lsb_pub, MAX_LSB_CNT);
636
637         bitno = find_first_zero_bit(qlsb, MAX_LSB_CNT);
638         while (bitno < MAX_LSB_CNT) {
639                 bitmap_set(ccp->lsbmap, bitno * LSB_SIZE, LSB_SIZE);
640                 bitmap_set(qlsb, bitno, 1);
641                 bitno = find_first_zero_bit(qlsb, MAX_LSB_CNT);
642         }
643
644         return rc;
645 }
646
647 static int ccp5_init(struct ccp_device *ccp)
648 {
649         struct device *dev = ccp->dev;
650         struct ccp_cmd_queue *cmd_q;
651         struct dma_pool *dma_pool;
652         char dma_pool_name[MAX_DMAPOOL_NAME_LEN];
653         unsigned int qmr, qim, i;
654         u64 status;
655         u32 status_lo, status_hi;
656         int ret;
657
658         /* Find available queues */
659         qim = 0;
660         qmr = ioread32(ccp->io_regs + Q_MASK_REG);
661         for (i = 0; i < MAX_HW_QUEUES; i++) {
662
663                 if (!(qmr & (1 << i)))
664                         continue;
665
666                 /* Allocate a dma pool for this queue */
667                 snprintf(dma_pool_name, sizeof(dma_pool_name), "%s_q%d",
668                          ccp->name, i);
669                 dma_pool = dma_pool_create(dma_pool_name, dev,
670                                            CCP_DMAPOOL_MAX_SIZE,
671                                            CCP_DMAPOOL_ALIGN, 0);
672                 if (!dma_pool) {
673                         dev_err(dev, "unable to allocate dma pool\n");
674                         ret = -ENOMEM;
675                 }
676
677                 cmd_q = &ccp->cmd_q[ccp->cmd_q_count];
678                 ccp->cmd_q_count++;
679
680                 cmd_q->ccp = ccp;
681                 cmd_q->id = i;
682                 cmd_q->dma_pool = dma_pool;
683                 mutex_init(&cmd_q->q_mutex);
684
685                 /* Page alignment satisfies our needs for N <= 128 */
686                 BUILD_BUG_ON(COMMANDS_PER_QUEUE > 128);
687                 cmd_q->qsize = Q_SIZE(Q_DESC_SIZE);
688                 cmd_q->qbase = dma_zalloc_coherent(dev, cmd_q->qsize,
689                                                    &cmd_q->qbase_dma,
690                                                    GFP_KERNEL);
691                 if (!cmd_q->qbase) {
692                         dev_err(dev, "unable to allocate command queue\n");
693                         ret = -ENOMEM;
694                         goto e_pool;
695                 }
696
697                 cmd_q->qidx = 0;
698                 /* Preset some register values and masks that are queue
699                  * number dependent
700                  */
701                 cmd_q->reg_control = ccp->io_regs +
702                                      CMD5_Q_STATUS_INCR * (i + 1);
703                 cmd_q->reg_tail_lo = cmd_q->reg_control + CMD5_Q_TAIL_LO_BASE;
704                 cmd_q->reg_head_lo = cmd_q->reg_control + CMD5_Q_HEAD_LO_BASE;
705                 cmd_q->reg_int_enable = cmd_q->reg_control +
706                                         CMD5_Q_INT_ENABLE_BASE;
707                 cmd_q->reg_interrupt_status = cmd_q->reg_control +
708                                               CMD5_Q_INTERRUPT_STATUS_BASE;
709                 cmd_q->reg_status = cmd_q->reg_control + CMD5_Q_STATUS_BASE;
710                 cmd_q->reg_int_status = cmd_q->reg_control +
711                                         CMD5_Q_INT_STATUS_BASE;
712                 cmd_q->reg_dma_status = cmd_q->reg_control +
713                                         CMD5_Q_DMA_STATUS_BASE;
714                 cmd_q->reg_dma_read_status = cmd_q->reg_control +
715                                              CMD5_Q_DMA_READ_STATUS_BASE;
716                 cmd_q->reg_dma_write_status = cmd_q->reg_control +
717                                               CMD5_Q_DMA_WRITE_STATUS_BASE;
718
719                 init_waitqueue_head(&cmd_q->int_queue);
720
721                 dev_dbg(dev, "queue #%u available\n", i);
722         }
723         if (ccp->cmd_q_count == 0) {
724                 dev_notice(dev, "no command queues available\n");
725                 ret = -EIO;
726                 goto e_pool;
727         }
728         dev_notice(dev, "%u command queues available\n", ccp->cmd_q_count);
729
730         /* Turn off the queues and disable interrupts until ready */
731         for (i = 0; i < ccp->cmd_q_count; i++) {
732                 cmd_q = &ccp->cmd_q[i];
733
734                 cmd_q->qcontrol = 0; /* Start with nothing */
735                 iowrite32(cmd_q->qcontrol, cmd_q->reg_control);
736
737                 /* Disable the interrupts */
738                 iowrite32(0x00, cmd_q->reg_int_enable);
739                 ioread32(cmd_q->reg_int_status);
740                 ioread32(cmd_q->reg_status);
741
742                 /* Clear the interrupts */
743                 iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status);
744         }
745
746         dev_dbg(dev, "Requesting an IRQ...\n");
747         /* Request an irq */
748         ret = ccp->get_irq(ccp);
749         if (ret) {
750                 dev_err(dev, "unable to allocate an IRQ\n");
751                 goto e_pool;
752         }
753
754         /* Initialize the queue used to suspend */
755         init_waitqueue_head(&ccp->suspend_queue);
756
757         dev_dbg(dev, "Loading LSB map...\n");
758         /* Copy the private LSB mask to the public registers */
759         status_lo = ioread32(ccp->io_regs + LSB_PRIVATE_MASK_LO_OFFSET);
760         status_hi = ioread32(ccp->io_regs + LSB_PRIVATE_MASK_HI_OFFSET);
761         iowrite32(status_lo, ccp->io_regs + LSB_PUBLIC_MASK_LO_OFFSET);
762         iowrite32(status_hi, ccp->io_regs + LSB_PUBLIC_MASK_HI_OFFSET);
763         status = ((u64)status_hi<<30) | (u64)status_lo;
764
765         dev_dbg(dev, "Configuring virtual queues...\n");
766         /* Configure size of each virtual queue accessible to host */
767         for (i = 0; i < ccp->cmd_q_count; i++) {
768                 u32 dma_addr_lo;
769                 u32 dma_addr_hi;
770
771                 cmd_q = &ccp->cmd_q[i];
772
773                 cmd_q->qcontrol &= ~(CMD5_Q_SIZE << CMD5_Q_SHIFT);
774                 cmd_q->qcontrol |= QUEUE_SIZE_VAL << CMD5_Q_SHIFT;
775
776                 cmd_q->qdma_tail = cmd_q->qbase_dma;
777                 dma_addr_lo = low_address(cmd_q->qdma_tail);
778                 iowrite32((u32)dma_addr_lo, cmd_q->reg_tail_lo);
779                 iowrite32((u32)dma_addr_lo, cmd_q->reg_head_lo);
780
781                 dma_addr_hi = high_address(cmd_q->qdma_tail);
782                 cmd_q->qcontrol |= (dma_addr_hi << 16);
783                 iowrite32(cmd_q->qcontrol, cmd_q->reg_control);
784
785                 /* Find the LSB regions accessible to the queue */
786                 ccp_find_lsb_regions(cmd_q, status);
787                 cmd_q->lsb = -1; /* Unassigned value */
788         }
789
790         dev_dbg(dev, "Assigning LSBs...\n");
791         ret = ccp_assign_lsbs(ccp);
792         if (ret) {
793                 dev_err(dev, "Unable to assign LSBs (%d)\n", ret);
794                 goto e_irq;
795         }
796
797         /* Optimization: pre-allocate LSB slots for each queue */
798         for (i = 0; i < ccp->cmd_q_count; i++) {
799                 ccp->cmd_q[i].sb_key = ccp_lsb_alloc(&ccp->cmd_q[i], 2);
800                 ccp->cmd_q[i].sb_ctx = ccp_lsb_alloc(&ccp->cmd_q[i], 2);
801         }
802
803         dev_dbg(dev, "Starting threads...\n");
804         /* Create a kthread for each queue */
805         for (i = 0; i < ccp->cmd_q_count; i++) {
806                 struct task_struct *kthread;
807
808                 cmd_q = &ccp->cmd_q[i];
809
810                 kthread = kthread_create(ccp_cmd_queue_thread, cmd_q,
811                                          "%s-q%u", ccp->name, cmd_q->id);
812                 if (IS_ERR(kthread)) {
813                         dev_err(dev, "error creating queue thread (%ld)\n",
814                                 PTR_ERR(kthread));
815                         ret = PTR_ERR(kthread);
816                         goto e_kthread;
817                 }
818
819                 cmd_q->kthread = kthread;
820                 wake_up_process(kthread);
821         }
822
823         dev_dbg(dev, "Enabling interrupts...\n");
824         /* Enable interrupts */
825         for (i = 0; i < ccp->cmd_q_count; i++) {
826                 cmd_q = &ccp->cmd_q[i];
827                 iowrite32(ALL_INTERRUPTS, cmd_q->reg_int_enable);
828         }
829
830         dev_dbg(dev, "Registering device...\n");
831         /* Put this on the unit list to make it available */
832         ccp_add_device(ccp);
833
834         ret = ccp_register_rng(ccp);
835         if (ret)
836                 goto e_kthread;
837
838         /* Register the DMA engine support */
839         ret = ccp_dmaengine_register(ccp);
840         if (ret)
841                 goto e_hwrng;
842
843         return 0;
844
845 e_hwrng:
846         ccp_unregister_rng(ccp);
847
848 e_kthread:
849         for (i = 0; i < ccp->cmd_q_count; i++)
850                 if (ccp->cmd_q[i].kthread)
851                         kthread_stop(ccp->cmd_q[i].kthread);
852
853 e_irq:
854         ccp->free_irq(ccp);
855
856 e_pool:
857         for (i = 0; i < ccp->cmd_q_count; i++)
858                 dma_pool_destroy(ccp->cmd_q[i].dma_pool);
859
860         return ret;
861 }
862
863 static void ccp5_destroy(struct ccp_device *ccp)
864 {
865         struct device *dev = ccp->dev;
866         struct ccp_cmd_queue *cmd_q;
867         struct ccp_cmd *cmd;
868         unsigned int i;
869
870         /* Unregister the DMA engine */
871         ccp_dmaengine_unregister(ccp);
872
873         /* Unregister the RNG */
874         ccp_unregister_rng(ccp);
875
876         /* Remove this device from the list of available units first */
877         ccp_del_device(ccp);
878
879         /* Disable and clear interrupts */
880         for (i = 0; i < ccp->cmd_q_count; i++) {
881                 cmd_q = &ccp->cmd_q[i];
882
883                 /* Turn off the run bit */
884                 iowrite32(cmd_q->qcontrol & ~CMD5_Q_RUN, cmd_q->reg_control);
885
886                 /* Disable the interrupts */
887                 iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status);
888
889                 /* Clear the interrupt status */
890                 iowrite32(0x00, cmd_q->reg_int_enable);
891                 ioread32(cmd_q->reg_int_status);
892                 ioread32(cmd_q->reg_status);
893         }
894
895         /* Stop the queue kthreads */
896         for (i = 0; i < ccp->cmd_q_count; i++)
897                 if (ccp->cmd_q[i].kthread)
898                         kthread_stop(ccp->cmd_q[i].kthread);
899
900         ccp->free_irq(ccp);
901
902         for (i = 0; i < ccp->cmd_q_count; i++) {
903                 cmd_q = &ccp->cmd_q[i];
904                 dma_free_coherent(dev, cmd_q->qsize, cmd_q->qbase,
905                                   cmd_q->qbase_dma);
906         }
907
908         /* Flush the cmd and backlog queue */
909         while (!list_empty(&ccp->cmd)) {
910                 /* Invoke the callback directly with an error code */
911                 cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry);
912                 list_del(&cmd->entry);
913                 cmd->callback(cmd->data, -ENODEV);
914         }
915         while (!list_empty(&ccp->backlog)) {
916                 /* Invoke the callback directly with an error code */
917                 cmd = list_first_entry(&ccp->backlog, struct ccp_cmd, entry);
918                 list_del(&cmd->entry);
919                 cmd->callback(cmd->data, -ENODEV);
920         }
921 }
922
923 static irqreturn_t ccp5_irq_handler(int irq, void *data)
924 {
925         struct device *dev = data;
926         struct ccp_device *ccp = dev_get_drvdata(dev);
927         u32 status;
928         unsigned int i;
929
930         for (i = 0; i < ccp->cmd_q_count; i++) {
931                 struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i];
932
933                 status = ioread32(cmd_q->reg_interrupt_status);
934
935                 if (status) {
936                         cmd_q->int_status = status;
937                         cmd_q->q_status = ioread32(cmd_q->reg_status);
938                         cmd_q->q_int_status = ioread32(cmd_q->reg_int_status);
939
940                         /* On error, only save the first error value */
941                         if ((status & INT_ERROR) && !cmd_q->cmd_error)
942                                 cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status);
943
944                         cmd_q->int_rcvd = 1;
945
946                         /* Acknowledge the interrupt and wake the kthread */
947                         iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status);
948                         wake_up_interruptible(&cmd_q->int_queue);
949                 }
950         }
951
952         return IRQ_HANDLED;
953 }
954
955 static void ccp5_config(struct ccp_device *ccp)
956 {
957         /* Public side */
958         iowrite32(0x00001249, ccp->io_regs + CMD5_REQID_CONFIG_OFFSET);
959 }
960
961 static void ccp5other_config(struct ccp_device *ccp)
962 {
963         int i;
964         u32 rnd;
965
966         /* We own all of the queues on the NTB CCP */
967
968         iowrite32(0x00012D57, ccp->io_regs + CMD5_TRNG_CTL_OFFSET);
969         iowrite32(0x00000003, ccp->io_regs + CMD5_CONFIG_0_OFFSET);
970         for (i = 0; i < 12; i++) {
971                 rnd = ioread32(ccp->io_regs + TRNG_OUT_REG);
972                 iowrite32(rnd, ccp->io_regs + CMD5_AES_MASK_OFFSET);
973         }
974
975         iowrite32(0x0000001F, ccp->io_regs + CMD5_QUEUE_MASK_OFFSET);
976         iowrite32(0x00005B6D, ccp->io_regs + CMD5_QUEUE_PRIO_OFFSET);
977         iowrite32(0x00000000, ccp->io_regs + CMD5_CMD_TIMEOUT_OFFSET);
978
979         iowrite32(0x3FFFFFFF, ccp->io_regs + LSB_PRIVATE_MASK_LO_OFFSET);
980         iowrite32(0x000003FF, ccp->io_regs + LSB_PRIVATE_MASK_HI_OFFSET);
981
982         iowrite32(0x00108823, ccp->io_regs + CMD5_CLK_GATE_CTL_OFFSET);
983
984         ccp5_config(ccp);
985 }
986
987 /* Version 5 adds some function, but is essentially the same as v5 */
988 static const struct ccp_actions ccp5_actions = {
989         .aes = ccp5_perform_aes,
990         .xts_aes = ccp5_perform_xts_aes,
991         .sha = ccp5_perform_sha,
992         .rsa = ccp5_perform_rsa,
993         .passthru = ccp5_perform_passthru,
994         .ecc = ccp5_perform_ecc,
995         .sballoc = ccp_lsb_alloc,
996         .sbfree = ccp_lsb_free,
997         .init = ccp5_init,
998         .destroy = ccp5_destroy,
999         .get_free_slots = ccp5_get_free_slots,
1000         .irqhandler = ccp5_irq_handler,
1001 };
1002
1003 const struct ccp_vdata ccpv5a = {
1004         .version = CCP_VERSION(5, 0),
1005         .setup = ccp5_config,
1006         .perform = &ccp5_actions,
1007         .bar = 2,
1008         .offset = 0x0,
1009 };
1010
1011 const struct ccp_vdata ccpv5b = {
1012         .version = CCP_VERSION(5, 0),
1013         .setup = ccp5other_config,
1014         .perform = &ccp5_actions,
1015         .bar = 2,
1016         .offset = 0x0,
1017 };