GNU Linux-libre 4.14.290-gnu1
[releases.git] / arch / arm64 / crypto / sha512-core.S_shipped
1 // Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
2 //
3 // Licensed under the OpenSSL license (the "License").  You may not use
4 // this file except in compliance with the License.  You can obtain a copy
5 // in the file LICENSE in the source distribution or at
6 // https://www.openssl.org/source/license.html
7
8 // ====================================================================
9 // Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
10 // project. The module is, however, dual licensed under OpenSSL and
11 // CRYPTOGAMS licenses depending on where you obtain it. For further
12 // details see http://www.openssl.org/~appro/cryptogams/.
13 //
14 // Permission to use under GPLv2 terms is granted.
15 // ====================================================================
16 //
17 // SHA256/512 for ARMv8.
18 //
19 // Performance in cycles per processed byte and improvement coefficient
20 // over code generated with "default" compiler:
21 //
22 //              SHA256-hw       SHA256(*)       SHA512
23 // Apple A7     1.97            10.5 (+33%)     6.73 (-1%(**))
24 // Cortex-A53   2.38            15.5 (+115%)    10.0 (+150%(***))
25 // Cortex-A57   2.31            11.6 (+86%)     7.51 (+260%(***))
26 // Denver       2.01            10.5 (+26%)     6.70 (+8%)
27 // X-Gene                       20.0 (+100%)    12.8 (+300%(***))
28 // Mongoose     2.36            13.0 (+50%)     8.36 (+33%)
29 //
30 // (*)  Software SHA256 results are of lesser relevance, presented
31 //      mostly for informational purposes.
32 // (**) The result is a trade-off: it's possible to improve it by
33 //      10% (or by 1 cycle per round), but at the cost of 20% loss
34 //      on Cortex-A53 (or by 4 cycles per round).
35 // (***)        Super-impressive coefficients over gcc-generated code are
36 //      indication of some compiler "pathology", most notably code
37 //      generated with -mgeneral-regs-only is significanty faster
38 //      and the gap is only 40-90%.
39 //
40 // October 2016.
41 //
42 // Originally it was reckoned that it makes no sense to implement NEON
43 // version of SHA256 for 64-bit processors. This is because performance
44 // improvement on most wide-spread Cortex-A5x processors was observed
45 // to be marginal, same on Cortex-A53 and ~10% on A57. But then it was
46 // observed that 32-bit NEON SHA256 performs significantly better than
47 // 64-bit scalar version on *some* of the more recent processors. As
48 // result 64-bit NEON version of SHA256 was added to provide best
49 // all-round performance. For example it executes ~30% faster on X-Gene
50 // and Mongoose. [For reference, NEON version of SHA512 is bound to
51 // deliver much less improvement, likely *negative* on Cortex-A5x.
52 // Which is why NEON support is limited to SHA256.]
53
54 #ifndef __KERNEL__
55 # include "arm_arch.h"
56 #endif
57
58 .text
59
60 .extern OPENSSL_armcap_P
61 .globl  sha512_block_data_order
62 .type   sha512_block_data_order,%function
63 .align  6
64 sha512_block_data_order:
65         stp     x29,x30,[sp,#-128]!
66         add     x29,sp,#0
67
68         stp     x19,x20,[sp,#16]
69         stp     x21,x22,[sp,#32]
70         stp     x23,x24,[sp,#48]
71         stp     x25,x26,[sp,#64]
72         stp     x27,x28,[sp,#80]
73         sub     sp,sp,#4*8
74
75         ldp     x20,x21,[x0]                            // load context
76         ldp     x22,x23,[x0,#2*8]
77         ldp     x24,x25,[x0,#4*8]
78         add     x2,x1,x2,lsl#7  // end of input
79         ldp     x26,x27,[x0,#6*8]
80         adr     x30,.LK512
81         stp     x0,x2,[x29,#96]
82
83 .Loop:
84         ldp     x3,x4,[x1],#2*8
85         ldr     x19,[x30],#8                    // *K++
86         eor     x28,x21,x22                             // magic seed
87         str     x1,[x29,#112]
88 #ifndef __AARCH64EB__
89         rev     x3,x3                   // 0
90 #endif
91         ror     x16,x24,#14
92         add     x27,x27,x19                     // h+=K[i]
93         eor     x6,x24,x24,ror#23
94         and     x17,x25,x24
95         bic     x19,x26,x24
96         add     x27,x27,x3                      // h+=X[i]
97         orr     x17,x17,x19                     // Ch(e,f,g)
98         eor     x19,x20,x21                     // a^b, b^c in next round
99         eor     x16,x16,x6,ror#18       // Sigma1(e)
100         ror     x6,x20,#28
101         add     x27,x27,x17                     // h+=Ch(e,f,g)
102         eor     x17,x20,x20,ror#5
103         add     x27,x27,x16                     // h+=Sigma1(e)
104         and     x28,x28,x19                     // (b^c)&=(a^b)
105         add     x23,x23,x27                     // d+=h
106         eor     x28,x28,x21                     // Maj(a,b,c)
107         eor     x17,x6,x17,ror#34       // Sigma0(a)
108         add     x27,x27,x28                     // h+=Maj(a,b,c)
109         ldr     x28,[x30],#8            // *K++, x19 in next round
110         //add   x27,x27,x17                     // h+=Sigma0(a)
111 #ifndef __AARCH64EB__
112         rev     x4,x4                   // 1
113 #endif
114         ldp     x5,x6,[x1],#2*8
115         add     x27,x27,x17                     // h+=Sigma0(a)
116         ror     x16,x23,#14
117         add     x26,x26,x28                     // h+=K[i]
118         eor     x7,x23,x23,ror#23
119         and     x17,x24,x23
120         bic     x28,x25,x23
121         add     x26,x26,x4                      // h+=X[i]
122         orr     x17,x17,x28                     // Ch(e,f,g)
123         eor     x28,x27,x20                     // a^b, b^c in next round
124         eor     x16,x16,x7,ror#18       // Sigma1(e)
125         ror     x7,x27,#28
126         add     x26,x26,x17                     // h+=Ch(e,f,g)
127         eor     x17,x27,x27,ror#5
128         add     x26,x26,x16                     // h+=Sigma1(e)
129         and     x19,x19,x28                     // (b^c)&=(a^b)
130         add     x22,x22,x26                     // d+=h
131         eor     x19,x19,x20                     // Maj(a,b,c)
132         eor     x17,x7,x17,ror#34       // Sigma0(a)
133         add     x26,x26,x19                     // h+=Maj(a,b,c)
134         ldr     x19,[x30],#8            // *K++, x28 in next round
135         //add   x26,x26,x17                     // h+=Sigma0(a)
136 #ifndef __AARCH64EB__
137         rev     x5,x5                   // 2
138 #endif
139         add     x26,x26,x17                     // h+=Sigma0(a)
140         ror     x16,x22,#14
141         add     x25,x25,x19                     // h+=K[i]
142         eor     x8,x22,x22,ror#23
143         and     x17,x23,x22
144         bic     x19,x24,x22
145         add     x25,x25,x5                      // h+=X[i]
146         orr     x17,x17,x19                     // Ch(e,f,g)
147         eor     x19,x26,x27                     // a^b, b^c in next round
148         eor     x16,x16,x8,ror#18       // Sigma1(e)
149         ror     x8,x26,#28
150         add     x25,x25,x17                     // h+=Ch(e,f,g)
151         eor     x17,x26,x26,ror#5
152         add     x25,x25,x16                     // h+=Sigma1(e)
153         and     x28,x28,x19                     // (b^c)&=(a^b)
154         add     x21,x21,x25                     // d+=h
155         eor     x28,x28,x27                     // Maj(a,b,c)
156         eor     x17,x8,x17,ror#34       // Sigma0(a)
157         add     x25,x25,x28                     // h+=Maj(a,b,c)
158         ldr     x28,[x30],#8            // *K++, x19 in next round
159         //add   x25,x25,x17                     // h+=Sigma0(a)
160 #ifndef __AARCH64EB__
161         rev     x6,x6                   // 3
162 #endif
163         ldp     x7,x8,[x1],#2*8
164         add     x25,x25,x17                     // h+=Sigma0(a)
165         ror     x16,x21,#14
166         add     x24,x24,x28                     // h+=K[i]
167         eor     x9,x21,x21,ror#23
168         and     x17,x22,x21
169         bic     x28,x23,x21
170         add     x24,x24,x6                      // h+=X[i]
171         orr     x17,x17,x28                     // Ch(e,f,g)
172         eor     x28,x25,x26                     // a^b, b^c in next round
173         eor     x16,x16,x9,ror#18       // Sigma1(e)
174         ror     x9,x25,#28
175         add     x24,x24,x17                     // h+=Ch(e,f,g)
176         eor     x17,x25,x25,ror#5
177         add     x24,x24,x16                     // h+=Sigma1(e)
178         and     x19,x19,x28                     // (b^c)&=(a^b)
179         add     x20,x20,x24                     // d+=h
180         eor     x19,x19,x26                     // Maj(a,b,c)
181         eor     x17,x9,x17,ror#34       // Sigma0(a)
182         add     x24,x24,x19                     // h+=Maj(a,b,c)
183         ldr     x19,[x30],#8            // *K++, x28 in next round
184         //add   x24,x24,x17                     // h+=Sigma0(a)
185 #ifndef __AARCH64EB__
186         rev     x7,x7                   // 4
187 #endif
188         add     x24,x24,x17                     // h+=Sigma0(a)
189         ror     x16,x20,#14
190         add     x23,x23,x19                     // h+=K[i]
191         eor     x10,x20,x20,ror#23
192         and     x17,x21,x20
193         bic     x19,x22,x20
194         add     x23,x23,x7                      // h+=X[i]
195         orr     x17,x17,x19                     // Ch(e,f,g)
196         eor     x19,x24,x25                     // a^b, b^c in next round
197         eor     x16,x16,x10,ror#18      // Sigma1(e)
198         ror     x10,x24,#28
199         add     x23,x23,x17                     // h+=Ch(e,f,g)
200         eor     x17,x24,x24,ror#5
201         add     x23,x23,x16                     // h+=Sigma1(e)
202         and     x28,x28,x19                     // (b^c)&=(a^b)
203         add     x27,x27,x23                     // d+=h
204         eor     x28,x28,x25                     // Maj(a,b,c)
205         eor     x17,x10,x17,ror#34      // Sigma0(a)
206         add     x23,x23,x28                     // h+=Maj(a,b,c)
207         ldr     x28,[x30],#8            // *K++, x19 in next round
208         //add   x23,x23,x17                     // h+=Sigma0(a)
209 #ifndef __AARCH64EB__
210         rev     x8,x8                   // 5
211 #endif
212         ldp     x9,x10,[x1],#2*8
213         add     x23,x23,x17                     // h+=Sigma0(a)
214         ror     x16,x27,#14
215         add     x22,x22,x28                     // h+=K[i]
216         eor     x11,x27,x27,ror#23
217         and     x17,x20,x27
218         bic     x28,x21,x27
219         add     x22,x22,x8                      // h+=X[i]
220         orr     x17,x17,x28                     // Ch(e,f,g)
221         eor     x28,x23,x24                     // a^b, b^c in next round
222         eor     x16,x16,x11,ror#18      // Sigma1(e)
223         ror     x11,x23,#28
224         add     x22,x22,x17                     // h+=Ch(e,f,g)
225         eor     x17,x23,x23,ror#5
226         add     x22,x22,x16                     // h+=Sigma1(e)
227         and     x19,x19,x28                     // (b^c)&=(a^b)
228         add     x26,x26,x22                     // d+=h
229         eor     x19,x19,x24                     // Maj(a,b,c)
230         eor     x17,x11,x17,ror#34      // Sigma0(a)
231         add     x22,x22,x19                     // h+=Maj(a,b,c)
232         ldr     x19,[x30],#8            // *K++, x28 in next round
233         //add   x22,x22,x17                     // h+=Sigma0(a)
234 #ifndef __AARCH64EB__
235         rev     x9,x9                   // 6
236 #endif
237         add     x22,x22,x17                     // h+=Sigma0(a)
238         ror     x16,x26,#14
239         add     x21,x21,x19                     // h+=K[i]
240         eor     x12,x26,x26,ror#23
241         and     x17,x27,x26
242         bic     x19,x20,x26
243         add     x21,x21,x9                      // h+=X[i]
244         orr     x17,x17,x19                     // Ch(e,f,g)
245         eor     x19,x22,x23                     // a^b, b^c in next round
246         eor     x16,x16,x12,ror#18      // Sigma1(e)
247         ror     x12,x22,#28
248         add     x21,x21,x17                     // h+=Ch(e,f,g)
249         eor     x17,x22,x22,ror#5
250         add     x21,x21,x16                     // h+=Sigma1(e)
251         and     x28,x28,x19                     // (b^c)&=(a^b)
252         add     x25,x25,x21                     // d+=h
253         eor     x28,x28,x23                     // Maj(a,b,c)
254         eor     x17,x12,x17,ror#34      // Sigma0(a)
255         add     x21,x21,x28                     // h+=Maj(a,b,c)
256         ldr     x28,[x30],#8            // *K++, x19 in next round
257         //add   x21,x21,x17                     // h+=Sigma0(a)
258 #ifndef __AARCH64EB__
259         rev     x10,x10                 // 7
260 #endif
261         ldp     x11,x12,[x1],#2*8
262         add     x21,x21,x17                     // h+=Sigma0(a)
263         ror     x16,x25,#14
264         add     x20,x20,x28                     // h+=K[i]
265         eor     x13,x25,x25,ror#23
266         and     x17,x26,x25
267         bic     x28,x27,x25
268         add     x20,x20,x10                     // h+=X[i]
269         orr     x17,x17,x28                     // Ch(e,f,g)
270         eor     x28,x21,x22                     // a^b, b^c in next round
271         eor     x16,x16,x13,ror#18      // Sigma1(e)
272         ror     x13,x21,#28
273         add     x20,x20,x17                     // h+=Ch(e,f,g)
274         eor     x17,x21,x21,ror#5
275         add     x20,x20,x16                     // h+=Sigma1(e)
276         and     x19,x19,x28                     // (b^c)&=(a^b)
277         add     x24,x24,x20                     // d+=h
278         eor     x19,x19,x22                     // Maj(a,b,c)
279         eor     x17,x13,x17,ror#34      // Sigma0(a)
280         add     x20,x20,x19                     // h+=Maj(a,b,c)
281         ldr     x19,[x30],#8            // *K++, x28 in next round
282         //add   x20,x20,x17                     // h+=Sigma0(a)
283 #ifndef __AARCH64EB__
284         rev     x11,x11                 // 8
285 #endif
286         add     x20,x20,x17                     // h+=Sigma0(a)
287         ror     x16,x24,#14
288         add     x27,x27,x19                     // h+=K[i]
289         eor     x14,x24,x24,ror#23
290         and     x17,x25,x24
291         bic     x19,x26,x24
292         add     x27,x27,x11                     // h+=X[i]
293         orr     x17,x17,x19                     // Ch(e,f,g)
294         eor     x19,x20,x21                     // a^b, b^c in next round
295         eor     x16,x16,x14,ror#18      // Sigma1(e)
296         ror     x14,x20,#28
297         add     x27,x27,x17                     // h+=Ch(e,f,g)
298         eor     x17,x20,x20,ror#5
299         add     x27,x27,x16                     // h+=Sigma1(e)
300         and     x28,x28,x19                     // (b^c)&=(a^b)
301         add     x23,x23,x27                     // d+=h
302         eor     x28,x28,x21                     // Maj(a,b,c)
303         eor     x17,x14,x17,ror#34      // Sigma0(a)
304         add     x27,x27,x28                     // h+=Maj(a,b,c)
305         ldr     x28,[x30],#8            // *K++, x19 in next round
306         //add   x27,x27,x17                     // h+=Sigma0(a)
307 #ifndef __AARCH64EB__
308         rev     x12,x12                 // 9
309 #endif
310         ldp     x13,x14,[x1],#2*8
311         add     x27,x27,x17                     // h+=Sigma0(a)
312         ror     x16,x23,#14
313         add     x26,x26,x28                     // h+=K[i]
314         eor     x15,x23,x23,ror#23
315         and     x17,x24,x23
316         bic     x28,x25,x23
317         add     x26,x26,x12                     // h+=X[i]
318         orr     x17,x17,x28                     // Ch(e,f,g)
319         eor     x28,x27,x20                     // a^b, b^c in next round
320         eor     x16,x16,x15,ror#18      // Sigma1(e)
321         ror     x15,x27,#28
322         add     x26,x26,x17                     // h+=Ch(e,f,g)
323         eor     x17,x27,x27,ror#5
324         add     x26,x26,x16                     // h+=Sigma1(e)
325         and     x19,x19,x28                     // (b^c)&=(a^b)
326         add     x22,x22,x26                     // d+=h
327         eor     x19,x19,x20                     // Maj(a,b,c)
328         eor     x17,x15,x17,ror#34      // Sigma0(a)
329         add     x26,x26,x19                     // h+=Maj(a,b,c)
330         ldr     x19,[x30],#8            // *K++, x28 in next round
331         //add   x26,x26,x17                     // h+=Sigma0(a)
332 #ifndef __AARCH64EB__
333         rev     x13,x13                 // 10
334 #endif
335         add     x26,x26,x17                     // h+=Sigma0(a)
336         ror     x16,x22,#14
337         add     x25,x25,x19                     // h+=K[i]
338         eor     x0,x22,x22,ror#23
339         and     x17,x23,x22
340         bic     x19,x24,x22
341         add     x25,x25,x13                     // h+=X[i]
342         orr     x17,x17,x19                     // Ch(e,f,g)
343         eor     x19,x26,x27                     // a^b, b^c in next round
344         eor     x16,x16,x0,ror#18       // Sigma1(e)
345         ror     x0,x26,#28
346         add     x25,x25,x17                     // h+=Ch(e,f,g)
347         eor     x17,x26,x26,ror#5
348         add     x25,x25,x16                     // h+=Sigma1(e)
349         and     x28,x28,x19                     // (b^c)&=(a^b)
350         add     x21,x21,x25                     // d+=h
351         eor     x28,x28,x27                     // Maj(a,b,c)
352         eor     x17,x0,x17,ror#34       // Sigma0(a)
353         add     x25,x25,x28                     // h+=Maj(a,b,c)
354         ldr     x28,[x30],#8            // *K++, x19 in next round
355         //add   x25,x25,x17                     // h+=Sigma0(a)
356 #ifndef __AARCH64EB__
357         rev     x14,x14                 // 11
358 #endif
359         ldp     x15,x0,[x1],#2*8
360         add     x25,x25,x17                     // h+=Sigma0(a)
361         str     x6,[sp,#24]
362         ror     x16,x21,#14
363         add     x24,x24,x28                     // h+=K[i]
364         eor     x6,x21,x21,ror#23
365         and     x17,x22,x21
366         bic     x28,x23,x21
367         add     x24,x24,x14                     // h+=X[i]
368         orr     x17,x17,x28                     // Ch(e,f,g)
369         eor     x28,x25,x26                     // a^b, b^c in next round
370         eor     x16,x16,x6,ror#18       // Sigma1(e)
371         ror     x6,x25,#28
372         add     x24,x24,x17                     // h+=Ch(e,f,g)
373         eor     x17,x25,x25,ror#5
374         add     x24,x24,x16                     // h+=Sigma1(e)
375         and     x19,x19,x28                     // (b^c)&=(a^b)
376         add     x20,x20,x24                     // d+=h
377         eor     x19,x19,x26                     // Maj(a,b,c)
378         eor     x17,x6,x17,ror#34       // Sigma0(a)
379         add     x24,x24,x19                     // h+=Maj(a,b,c)
380         ldr     x19,[x30],#8            // *K++, x28 in next round
381         //add   x24,x24,x17                     // h+=Sigma0(a)
382 #ifndef __AARCH64EB__
383         rev     x15,x15                 // 12
384 #endif
385         add     x24,x24,x17                     // h+=Sigma0(a)
386         str     x7,[sp,#0]
387         ror     x16,x20,#14
388         add     x23,x23,x19                     // h+=K[i]
389         eor     x7,x20,x20,ror#23
390         and     x17,x21,x20
391         bic     x19,x22,x20
392         add     x23,x23,x15                     // h+=X[i]
393         orr     x17,x17,x19                     // Ch(e,f,g)
394         eor     x19,x24,x25                     // a^b, b^c in next round
395         eor     x16,x16,x7,ror#18       // Sigma1(e)
396         ror     x7,x24,#28
397         add     x23,x23,x17                     // h+=Ch(e,f,g)
398         eor     x17,x24,x24,ror#5
399         add     x23,x23,x16                     // h+=Sigma1(e)
400         and     x28,x28,x19                     // (b^c)&=(a^b)
401         add     x27,x27,x23                     // d+=h
402         eor     x28,x28,x25                     // Maj(a,b,c)
403         eor     x17,x7,x17,ror#34       // Sigma0(a)
404         add     x23,x23,x28                     // h+=Maj(a,b,c)
405         ldr     x28,[x30],#8            // *K++, x19 in next round
406         //add   x23,x23,x17                     // h+=Sigma0(a)
407 #ifndef __AARCH64EB__
408         rev     x0,x0                   // 13
409 #endif
410         ldp     x1,x2,[x1]
411         add     x23,x23,x17                     // h+=Sigma0(a)
412         str     x8,[sp,#8]
413         ror     x16,x27,#14
414         add     x22,x22,x28                     // h+=K[i]
415         eor     x8,x27,x27,ror#23
416         and     x17,x20,x27
417         bic     x28,x21,x27
418         add     x22,x22,x0                      // h+=X[i]
419         orr     x17,x17,x28                     // Ch(e,f,g)
420         eor     x28,x23,x24                     // a^b, b^c in next round
421         eor     x16,x16,x8,ror#18       // Sigma1(e)
422         ror     x8,x23,#28
423         add     x22,x22,x17                     // h+=Ch(e,f,g)
424         eor     x17,x23,x23,ror#5
425         add     x22,x22,x16                     // h+=Sigma1(e)
426         and     x19,x19,x28                     // (b^c)&=(a^b)
427         add     x26,x26,x22                     // d+=h
428         eor     x19,x19,x24                     // Maj(a,b,c)
429         eor     x17,x8,x17,ror#34       // Sigma0(a)
430         add     x22,x22,x19                     // h+=Maj(a,b,c)
431         ldr     x19,[x30],#8            // *K++, x28 in next round
432         //add   x22,x22,x17                     // h+=Sigma0(a)
433 #ifndef __AARCH64EB__
434         rev     x1,x1                   // 14
435 #endif
436         ldr     x6,[sp,#24]
437         add     x22,x22,x17                     // h+=Sigma0(a)
438         str     x9,[sp,#16]
439         ror     x16,x26,#14
440         add     x21,x21,x19                     // h+=K[i]
441         eor     x9,x26,x26,ror#23
442         and     x17,x27,x26
443         bic     x19,x20,x26
444         add     x21,x21,x1                      // h+=X[i]
445         orr     x17,x17,x19                     // Ch(e,f,g)
446         eor     x19,x22,x23                     // a^b, b^c in next round
447         eor     x16,x16,x9,ror#18       // Sigma1(e)
448         ror     x9,x22,#28
449         add     x21,x21,x17                     // h+=Ch(e,f,g)
450         eor     x17,x22,x22,ror#5
451         add     x21,x21,x16                     // h+=Sigma1(e)
452         and     x28,x28,x19                     // (b^c)&=(a^b)
453         add     x25,x25,x21                     // d+=h
454         eor     x28,x28,x23                     // Maj(a,b,c)
455         eor     x17,x9,x17,ror#34       // Sigma0(a)
456         add     x21,x21,x28                     // h+=Maj(a,b,c)
457         ldr     x28,[x30],#8            // *K++, x19 in next round
458         //add   x21,x21,x17                     // h+=Sigma0(a)
459 #ifndef __AARCH64EB__
460         rev     x2,x2                   // 15
461 #endif
462         ldr     x7,[sp,#0]
463         add     x21,x21,x17                     // h+=Sigma0(a)
464         str     x10,[sp,#24]
465         ror     x16,x25,#14
466         add     x20,x20,x28                     // h+=K[i]
467         ror     x9,x4,#1
468         and     x17,x26,x25
469         ror     x8,x1,#19
470         bic     x28,x27,x25
471         ror     x10,x21,#28
472         add     x20,x20,x2                      // h+=X[i]
473         eor     x16,x16,x25,ror#18
474         eor     x9,x9,x4,ror#8
475         orr     x17,x17,x28                     // Ch(e,f,g)
476         eor     x28,x21,x22                     // a^b, b^c in next round
477         eor     x16,x16,x25,ror#41      // Sigma1(e)
478         eor     x10,x10,x21,ror#34
479         add     x20,x20,x17                     // h+=Ch(e,f,g)
480         and     x19,x19,x28                     // (b^c)&=(a^b)
481         eor     x8,x8,x1,ror#61
482         eor     x9,x9,x4,lsr#7  // sigma0(X[i+1])
483         add     x20,x20,x16                     // h+=Sigma1(e)
484         eor     x19,x19,x22                     // Maj(a,b,c)
485         eor     x17,x10,x21,ror#39      // Sigma0(a)
486         eor     x8,x8,x1,lsr#6  // sigma1(X[i+14])
487         add     x3,x3,x12
488         add     x24,x24,x20                     // d+=h
489         add     x20,x20,x19                     // h+=Maj(a,b,c)
490         ldr     x19,[x30],#8            // *K++, x28 in next round
491         add     x3,x3,x9
492         add     x20,x20,x17                     // h+=Sigma0(a)
493         add     x3,x3,x8
494 .Loop_16_xx:
495         ldr     x8,[sp,#8]
496         str     x11,[sp,#0]
497         ror     x16,x24,#14
498         add     x27,x27,x19                     // h+=K[i]
499         ror     x10,x5,#1
500         and     x17,x25,x24
501         ror     x9,x2,#19
502         bic     x19,x26,x24
503         ror     x11,x20,#28
504         add     x27,x27,x3                      // h+=X[i]
505         eor     x16,x16,x24,ror#18
506         eor     x10,x10,x5,ror#8
507         orr     x17,x17,x19                     // Ch(e,f,g)
508         eor     x19,x20,x21                     // a^b, b^c in next round
509         eor     x16,x16,x24,ror#41      // Sigma1(e)
510         eor     x11,x11,x20,ror#34
511         add     x27,x27,x17                     // h+=Ch(e,f,g)
512         and     x28,x28,x19                     // (b^c)&=(a^b)
513         eor     x9,x9,x2,ror#61
514         eor     x10,x10,x5,lsr#7        // sigma0(X[i+1])
515         add     x27,x27,x16                     // h+=Sigma1(e)
516         eor     x28,x28,x21                     // Maj(a,b,c)
517         eor     x17,x11,x20,ror#39      // Sigma0(a)
518         eor     x9,x9,x2,lsr#6  // sigma1(X[i+14])
519         add     x4,x4,x13
520         add     x23,x23,x27                     // d+=h
521         add     x27,x27,x28                     // h+=Maj(a,b,c)
522         ldr     x28,[x30],#8            // *K++, x19 in next round
523         add     x4,x4,x10
524         add     x27,x27,x17                     // h+=Sigma0(a)
525         add     x4,x4,x9
526         ldr     x9,[sp,#16]
527         str     x12,[sp,#8]
528         ror     x16,x23,#14
529         add     x26,x26,x28                     // h+=K[i]
530         ror     x11,x6,#1
531         and     x17,x24,x23
532         ror     x10,x3,#19
533         bic     x28,x25,x23
534         ror     x12,x27,#28
535         add     x26,x26,x4                      // h+=X[i]
536         eor     x16,x16,x23,ror#18
537         eor     x11,x11,x6,ror#8
538         orr     x17,x17,x28                     // Ch(e,f,g)
539         eor     x28,x27,x20                     // a^b, b^c in next round
540         eor     x16,x16,x23,ror#41      // Sigma1(e)
541         eor     x12,x12,x27,ror#34
542         add     x26,x26,x17                     // h+=Ch(e,f,g)
543         and     x19,x19,x28                     // (b^c)&=(a^b)
544         eor     x10,x10,x3,ror#61
545         eor     x11,x11,x6,lsr#7        // sigma0(X[i+1])
546         add     x26,x26,x16                     // h+=Sigma1(e)
547         eor     x19,x19,x20                     // Maj(a,b,c)
548         eor     x17,x12,x27,ror#39      // Sigma0(a)
549         eor     x10,x10,x3,lsr#6        // sigma1(X[i+14])
550         add     x5,x5,x14
551         add     x22,x22,x26                     // d+=h
552         add     x26,x26,x19                     // h+=Maj(a,b,c)
553         ldr     x19,[x30],#8            // *K++, x28 in next round
554         add     x5,x5,x11
555         add     x26,x26,x17                     // h+=Sigma0(a)
556         add     x5,x5,x10
557         ldr     x10,[sp,#24]
558         str     x13,[sp,#16]
559         ror     x16,x22,#14
560         add     x25,x25,x19                     // h+=K[i]
561         ror     x12,x7,#1
562         and     x17,x23,x22
563         ror     x11,x4,#19
564         bic     x19,x24,x22
565         ror     x13,x26,#28
566         add     x25,x25,x5                      // h+=X[i]
567         eor     x16,x16,x22,ror#18
568         eor     x12,x12,x7,ror#8
569         orr     x17,x17,x19                     // Ch(e,f,g)
570         eor     x19,x26,x27                     // a^b, b^c in next round
571         eor     x16,x16,x22,ror#41      // Sigma1(e)
572         eor     x13,x13,x26,ror#34
573         add     x25,x25,x17                     // h+=Ch(e,f,g)
574         and     x28,x28,x19                     // (b^c)&=(a^b)
575         eor     x11,x11,x4,ror#61
576         eor     x12,x12,x7,lsr#7        // sigma0(X[i+1])
577         add     x25,x25,x16                     // h+=Sigma1(e)
578         eor     x28,x28,x27                     // Maj(a,b,c)
579         eor     x17,x13,x26,ror#39      // Sigma0(a)
580         eor     x11,x11,x4,lsr#6        // sigma1(X[i+14])
581         add     x6,x6,x15
582         add     x21,x21,x25                     // d+=h
583         add     x25,x25,x28                     // h+=Maj(a,b,c)
584         ldr     x28,[x30],#8            // *K++, x19 in next round
585         add     x6,x6,x12
586         add     x25,x25,x17                     // h+=Sigma0(a)
587         add     x6,x6,x11
588         ldr     x11,[sp,#0]
589         str     x14,[sp,#24]
590         ror     x16,x21,#14
591         add     x24,x24,x28                     // h+=K[i]
592         ror     x13,x8,#1
593         and     x17,x22,x21
594         ror     x12,x5,#19
595         bic     x28,x23,x21
596         ror     x14,x25,#28
597         add     x24,x24,x6                      // h+=X[i]
598         eor     x16,x16,x21,ror#18
599         eor     x13,x13,x8,ror#8
600         orr     x17,x17,x28                     // Ch(e,f,g)
601         eor     x28,x25,x26                     // a^b, b^c in next round
602         eor     x16,x16,x21,ror#41      // Sigma1(e)
603         eor     x14,x14,x25,ror#34
604         add     x24,x24,x17                     // h+=Ch(e,f,g)
605         and     x19,x19,x28                     // (b^c)&=(a^b)
606         eor     x12,x12,x5,ror#61
607         eor     x13,x13,x8,lsr#7        // sigma0(X[i+1])
608         add     x24,x24,x16                     // h+=Sigma1(e)
609         eor     x19,x19,x26                     // Maj(a,b,c)
610         eor     x17,x14,x25,ror#39      // Sigma0(a)
611         eor     x12,x12,x5,lsr#6        // sigma1(X[i+14])
612         add     x7,x7,x0
613         add     x20,x20,x24                     // d+=h
614         add     x24,x24,x19                     // h+=Maj(a,b,c)
615         ldr     x19,[x30],#8            // *K++, x28 in next round
616         add     x7,x7,x13
617         add     x24,x24,x17                     // h+=Sigma0(a)
618         add     x7,x7,x12
619         ldr     x12,[sp,#8]
620         str     x15,[sp,#0]
621         ror     x16,x20,#14
622         add     x23,x23,x19                     // h+=K[i]
623         ror     x14,x9,#1
624         and     x17,x21,x20
625         ror     x13,x6,#19
626         bic     x19,x22,x20
627         ror     x15,x24,#28
628         add     x23,x23,x7                      // h+=X[i]
629         eor     x16,x16,x20,ror#18
630         eor     x14,x14,x9,ror#8
631         orr     x17,x17,x19                     // Ch(e,f,g)
632         eor     x19,x24,x25                     // a^b, b^c in next round
633         eor     x16,x16,x20,ror#41      // Sigma1(e)
634         eor     x15,x15,x24,ror#34
635         add     x23,x23,x17                     // h+=Ch(e,f,g)
636         and     x28,x28,x19                     // (b^c)&=(a^b)
637         eor     x13,x13,x6,ror#61
638         eor     x14,x14,x9,lsr#7        // sigma0(X[i+1])
639         add     x23,x23,x16                     // h+=Sigma1(e)
640         eor     x28,x28,x25                     // Maj(a,b,c)
641         eor     x17,x15,x24,ror#39      // Sigma0(a)
642         eor     x13,x13,x6,lsr#6        // sigma1(X[i+14])
643         add     x8,x8,x1
644         add     x27,x27,x23                     // d+=h
645         add     x23,x23,x28                     // h+=Maj(a,b,c)
646         ldr     x28,[x30],#8            // *K++, x19 in next round
647         add     x8,x8,x14
648         add     x23,x23,x17                     // h+=Sigma0(a)
649         add     x8,x8,x13
650         ldr     x13,[sp,#16]
651         str     x0,[sp,#8]
652         ror     x16,x27,#14
653         add     x22,x22,x28                     // h+=K[i]
654         ror     x15,x10,#1
655         and     x17,x20,x27
656         ror     x14,x7,#19
657         bic     x28,x21,x27
658         ror     x0,x23,#28
659         add     x22,x22,x8                      // h+=X[i]
660         eor     x16,x16,x27,ror#18
661         eor     x15,x15,x10,ror#8
662         orr     x17,x17,x28                     // Ch(e,f,g)
663         eor     x28,x23,x24                     // a^b, b^c in next round
664         eor     x16,x16,x27,ror#41      // Sigma1(e)
665         eor     x0,x0,x23,ror#34
666         add     x22,x22,x17                     // h+=Ch(e,f,g)
667         and     x19,x19,x28                     // (b^c)&=(a^b)
668         eor     x14,x14,x7,ror#61
669         eor     x15,x15,x10,lsr#7       // sigma0(X[i+1])
670         add     x22,x22,x16                     // h+=Sigma1(e)
671         eor     x19,x19,x24                     // Maj(a,b,c)
672         eor     x17,x0,x23,ror#39       // Sigma0(a)
673         eor     x14,x14,x7,lsr#6        // sigma1(X[i+14])
674         add     x9,x9,x2
675         add     x26,x26,x22                     // d+=h
676         add     x22,x22,x19                     // h+=Maj(a,b,c)
677         ldr     x19,[x30],#8            // *K++, x28 in next round
678         add     x9,x9,x15
679         add     x22,x22,x17                     // h+=Sigma0(a)
680         add     x9,x9,x14
681         ldr     x14,[sp,#24]
682         str     x1,[sp,#16]
683         ror     x16,x26,#14
684         add     x21,x21,x19                     // h+=K[i]
685         ror     x0,x11,#1
686         and     x17,x27,x26
687         ror     x15,x8,#19
688         bic     x19,x20,x26
689         ror     x1,x22,#28
690         add     x21,x21,x9                      // h+=X[i]
691         eor     x16,x16,x26,ror#18
692         eor     x0,x0,x11,ror#8
693         orr     x17,x17,x19                     // Ch(e,f,g)
694         eor     x19,x22,x23                     // a^b, b^c in next round
695         eor     x16,x16,x26,ror#41      // Sigma1(e)
696         eor     x1,x1,x22,ror#34
697         add     x21,x21,x17                     // h+=Ch(e,f,g)
698         and     x28,x28,x19                     // (b^c)&=(a^b)
699         eor     x15,x15,x8,ror#61
700         eor     x0,x0,x11,lsr#7 // sigma0(X[i+1])
701         add     x21,x21,x16                     // h+=Sigma1(e)
702         eor     x28,x28,x23                     // Maj(a,b,c)
703         eor     x17,x1,x22,ror#39       // Sigma0(a)
704         eor     x15,x15,x8,lsr#6        // sigma1(X[i+14])
705         add     x10,x10,x3
706         add     x25,x25,x21                     // d+=h
707         add     x21,x21,x28                     // h+=Maj(a,b,c)
708         ldr     x28,[x30],#8            // *K++, x19 in next round
709         add     x10,x10,x0
710         add     x21,x21,x17                     // h+=Sigma0(a)
711         add     x10,x10,x15
712         ldr     x15,[sp,#0]
713         str     x2,[sp,#24]
714         ror     x16,x25,#14
715         add     x20,x20,x28                     // h+=K[i]
716         ror     x1,x12,#1
717         and     x17,x26,x25
718         ror     x0,x9,#19
719         bic     x28,x27,x25
720         ror     x2,x21,#28
721         add     x20,x20,x10                     // h+=X[i]
722         eor     x16,x16,x25,ror#18
723         eor     x1,x1,x12,ror#8
724         orr     x17,x17,x28                     // Ch(e,f,g)
725         eor     x28,x21,x22                     // a^b, b^c in next round
726         eor     x16,x16,x25,ror#41      // Sigma1(e)
727         eor     x2,x2,x21,ror#34
728         add     x20,x20,x17                     // h+=Ch(e,f,g)
729         and     x19,x19,x28                     // (b^c)&=(a^b)
730         eor     x0,x0,x9,ror#61
731         eor     x1,x1,x12,lsr#7 // sigma0(X[i+1])
732         add     x20,x20,x16                     // h+=Sigma1(e)
733         eor     x19,x19,x22                     // Maj(a,b,c)
734         eor     x17,x2,x21,ror#39       // Sigma0(a)
735         eor     x0,x0,x9,lsr#6  // sigma1(X[i+14])
736         add     x11,x11,x4
737         add     x24,x24,x20                     // d+=h
738         add     x20,x20,x19                     // h+=Maj(a,b,c)
739         ldr     x19,[x30],#8            // *K++, x28 in next round
740         add     x11,x11,x1
741         add     x20,x20,x17                     // h+=Sigma0(a)
742         add     x11,x11,x0
743         ldr     x0,[sp,#8]
744         str     x3,[sp,#0]
745         ror     x16,x24,#14
746         add     x27,x27,x19                     // h+=K[i]
747         ror     x2,x13,#1
748         and     x17,x25,x24
749         ror     x1,x10,#19
750         bic     x19,x26,x24
751         ror     x3,x20,#28
752         add     x27,x27,x11                     // h+=X[i]
753         eor     x16,x16,x24,ror#18
754         eor     x2,x2,x13,ror#8
755         orr     x17,x17,x19                     // Ch(e,f,g)
756         eor     x19,x20,x21                     // a^b, b^c in next round
757         eor     x16,x16,x24,ror#41      // Sigma1(e)
758         eor     x3,x3,x20,ror#34
759         add     x27,x27,x17                     // h+=Ch(e,f,g)
760         and     x28,x28,x19                     // (b^c)&=(a^b)
761         eor     x1,x1,x10,ror#61
762         eor     x2,x2,x13,lsr#7 // sigma0(X[i+1])
763         add     x27,x27,x16                     // h+=Sigma1(e)
764         eor     x28,x28,x21                     // Maj(a,b,c)
765         eor     x17,x3,x20,ror#39       // Sigma0(a)
766         eor     x1,x1,x10,lsr#6 // sigma1(X[i+14])
767         add     x12,x12,x5
768         add     x23,x23,x27                     // d+=h
769         add     x27,x27,x28                     // h+=Maj(a,b,c)
770         ldr     x28,[x30],#8            // *K++, x19 in next round
771         add     x12,x12,x2
772         add     x27,x27,x17                     // h+=Sigma0(a)
773         add     x12,x12,x1
774         ldr     x1,[sp,#16]
775         str     x4,[sp,#8]
776         ror     x16,x23,#14
777         add     x26,x26,x28                     // h+=K[i]
778         ror     x3,x14,#1
779         and     x17,x24,x23
780         ror     x2,x11,#19
781         bic     x28,x25,x23
782         ror     x4,x27,#28
783         add     x26,x26,x12                     // h+=X[i]
784         eor     x16,x16,x23,ror#18
785         eor     x3,x3,x14,ror#8
786         orr     x17,x17,x28                     // Ch(e,f,g)
787         eor     x28,x27,x20                     // a^b, b^c in next round
788         eor     x16,x16,x23,ror#41      // Sigma1(e)
789         eor     x4,x4,x27,ror#34
790         add     x26,x26,x17                     // h+=Ch(e,f,g)
791         and     x19,x19,x28                     // (b^c)&=(a^b)
792         eor     x2,x2,x11,ror#61
793         eor     x3,x3,x14,lsr#7 // sigma0(X[i+1])
794         add     x26,x26,x16                     // h+=Sigma1(e)
795         eor     x19,x19,x20                     // Maj(a,b,c)
796         eor     x17,x4,x27,ror#39       // Sigma0(a)
797         eor     x2,x2,x11,lsr#6 // sigma1(X[i+14])
798         add     x13,x13,x6
799         add     x22,x22,x26                     // d+=h
800         add     x26,x26,x19                     // h+=Maj(a,b,c)
801         ldr     x19,[x30],#8            // *K++, x28 in next round
802         add     x13,x13,x3
803         add     x26,x26,x17                     // h+=Sigma0(a)
804         add     x13,x13,x2
805         ldr     x2,[sp,#24]
806         str     x5,[sp,#16]
807         ror     x16,x22,#14
808         add     x25,x25,x19                     // h+=K[i]
809         ror     x4,x15,#1
810         and     x17,x23,x22
811         ror     x3,x12,#19
812         bic     x19,x24,x22
813         ror     x5,x26,#28
814         add     x25,x25,x13                     // h+=X[i]
815         eor     x16,x16,x22,ror#18
816         eor     x4,x4,x15,ror#8
817         orr     x17,x17,x19                     // Ch(e,f,g)
818         eor     x19,x26,x27                     // a^b, b^c in next round
819         eor     x16,x16,x22,ror#41      // Sigma1(e)
820         eor     x5,x5,x26,ror#34
821         add     x25,x25,x17                     // h+=Ch(e,f,g)
822         and     x28,x28,x19                     // (b^c)&=(a^b)
823         eor     x3,x3,x12,ror#61
824         eor     x4,x4,x15,lsr#7 // sigma0(X[i+1])
825         add     x25,x25,x16                     // h+=Sigma1(e)
826         eor     x28,x28,x27                     // Maj(a,b,c)
827         eor     x17,x5,x26,ror#39       // Sigma0(a)
828         eor     x3,x3,x12,lsr#6 // sigma1(X[i+14])
829         add     x14,x14,x7
830         add     x21,x21,x25                     // d+=h
831         add     x25,x25,x28                     // h+=Maj(a,b,c)
832         ldr     x28,[x30],#8            // *K++, x19 in next round
833         add     x14,x14,x4
834         add     x25,x25,x17                     // h+=Sigma0(a)
835         add     x14,x14,x3
836         ldr     x3,[sp,#0]
837         str     x6,[sp,#24]
838         ror     x16,x21,#14
839         add     x24,x24,x28                     // h+=K[i]
840         ror     x5,x0,#1
841         and     x17,x22,x21
842         ror     x4,x13,#19
843         bic     x28,x23,x21
844         ror     x6,x25,#28
845         add     x24,x24,x14                     // h+=X[i]
846         eor     x16,x16,x21,ror#18
847         eor     x5,x5,x0,ror#8
848         orr     x17,x17,x28                     // Ch(e,f,g)
849         eor     x28,x25,x26                     // a^b, b^c in next round
850         eor     x16,x16,x21,ror#41      // Sigma1(e)
851         eor     x6,x6,x25,ror#34
852         add     x24,x24,x17                     // h+=Ch(e,f,g)
853         and     x19,x19,x28                     // (b^c)&=(a^b)
854         eor     x4,x4,x13,ror#61
855         eor     x5,x5,x0,lsr#7  // sigma0(X[i+1])
856         add     x24,x24,x16                     // h+=Sigma1(e)
857         eor     x19,x19,x26                     // Maj(a,b,c)
858         eor     x17,x6,x25,ror#39       // Sigma0(a)
859         eor     x4,x4,x13,lsr#6 // sigma1(X[i+14])
860         add     x15,x15,x8
861         add     x20,x20,x24                     // d+=h
862         add     x24,x24,x19                     // h+=Maj(a,b,c)
863         ldr     x19,[x30],#8            // *K++, x28 in next round
864         add     x15,x15,x5
865         add     x24,x24,x17                     // h+=Sigma0(a)
866         add     x15,x15,x4
867         ldr     x4,[sp,#8]
868         str     x7,[sp,#0]
869         ror     x16,x20,#14
870         add     x23,x23,x19                     // h+=K[i]
871         ror     x6,x1,#1
872         and     x17,x21,x20
873         ror     x5,x14,#19
874         bic     x19,x22,x20
875         ror     x7,x24,#28
876         add     x23,x23,x15                     // h+=X[i]
877         eor     x16,x16,x20,ror#18
878         eor     x6,x6,x1,ror#8
879         orr     x17,x17,x19                     // Ch(e,f,g)
880         eor     x19,x24,x25                     // a^b, b^c in next round
881         eor     x16,x16,x20,ror#41      // Sigma1(e)
882         eor     x7,x7,x24,ror#34
883         add     x23,x23,x17                     // h+=Ch(e,f,g)
884         and     x28,x28,x19                     // (b^c)&=(a^b)
885         eor     x5,x5,x14,ror#61
886         eor     x6,x6,x1,lsr#7  // sigma0(X[i+1])
887         add     x23,x23,x16                     // h+=Sigma1(e)
888         eor     x28,x28,x25                     // Maj(a,b,c)
889         eor     x17,x7,x24,ror#39       // Sigma0(a)
890         eor     x5,x5,x14,lsr#6 // sigma1(X[i+14])
891         add     x0,x0,x9
892         add     x27,x27,x23                     // d+=h
893         add     x23,x23,x28                     // h+=Maj(a,b,c)
894         ldr     x28,[x30],#8            // *K++, x19 in next round
895         add     x0,x0,x6
896         add     x23,x23,x17                     // h+=Sigma0(a)
897         add     x0,x0,x5
898         ldr     x5,[sp,#16]
899         str     x8,[sp,#8]
900         ror     x16,x27,#14
901         add     x22,x22,x28                     // h+=K[i]
902         ror     x7,x2,#1
903         and     x17,x20,x27
904         ror     x6,x15,#19
905         bic     x28,x21,x27
906         ror     x8,x23,#28
907         add     x22,x22,x0                      // h+=X[i]
908         eor     x16,x16,x27,ror#18
909         eor     x7,x7,x2,ror#8
910         orr     x17,x17,x28                     // Ch(e,f,g)
911         eor     x28,x23,x24                     // a^b, b^c in next round
912         eor     x16,x16,x27,ror#41      // Sigma1(e)
913         eor     x8,x8,x23,ror#34
914         add     x22,x22,x17                     // h+=Ch(e,f,g)
915         and     x19,x19,x28                     // (b^c)&=(a^b)
916         eor     x6,x6,x15,ror#61
917         eor     x7,x7,x2,lsr#7  // sigma0(X[i+1])
918         add     x22,x22,x16                     // h+=Sigma1(e)
919         eor     x19,x19,x24                     // Maj(a,b,c)
920         eor     x17,x8,x23,ror#39       // Sigma0(a)
921         eor     x6,x6,x15,lsr#6 // sigma1(X[i+14])
922         add     x1,x1,x10
923         add     x26,x26,x22                     // d+=h
924         add     x22,x22,x19                     // h+=Maj(a,b,c)
925         ldr     x19,[x30],#8            // *K++, x28 in next round
926         add     x1,x1,x7
927         add     x22,x22,x17                     // h+=Sigma0(a)
928         add     x1,x1,x6
929         ldr     x6,[sp,#24]
930         str     x9,[sp,#16]
931         ror     x16,x26,#14
932         add     x21,x21,x19                     // h+=K[i]
933         ror     x8,x3,#1
934         and     x17,x27,x26
935         ror     x7,x0,#19
936         bic     x19,x20,x26
937         ror     x9,x22,#28
938         add     x21,x21,x1                      // h+=X[i]
939         eor     x16,x16,x26,ror#18
940         eor     x8,x8,x3,ror#8
941         orr     x17,x17,x19                     // Ch(e,f,g)
942         eor     x19,x22,x23                     // a^b, b^c in next round
943         eor     x16,x16,x26,ror#41      // Sigma1(e)
944         eor     x9,x9,x22,ror#34
945         add     x21,x21,x17                     // h+=Ch(e,f,g)
946         and     x28,x28,x19                     // (b^c)&=(a^b)
947         eor     x7,x7,x0,ror#61
948         eor     x8,x8,x3,lsr#7  // sigma0(X[i+1])
949         add     x21,x21,x16                     // h+=Sigma1(e)
950         eor     x28,x28,x23                     // Maj(a,b,c)
951         eor     x17,x9,x22,ror#39       // Sigma0(a)
952         eor     x7,x7,x0,lsr#6  // sigma1(X[i+14])
953         add     x2,x2,x11
954         add     x25,x25,x21                     // d+=h
955         add     x21,x21,x28                     // h+=Maj(a,b,c)
956         ldr     x28,[x30],#8            // *K++, x19 in next round
957         add     x2,x2,x8
958         add     x21,x21,x17                     // h+=Sigma0(a)
959         add     x2,x2,x7
960         ldr     x7,[sp,#0]
961         str     x10,[sp,#24]
962         ror     x16,x25,#14
963         add     x20,x20,x28                     // h+=K[i]
964         ror     x9,x4,#1
965         and     x17,x26,x25
966         ror     x8,x1,#19
967         bic     x28,x27,x25
968         ror     x10,x21,#28
969         add     x20,x20,x2                      // h+=X[i]
970         eor     x16,x16,x25,ror#18
971         eor     x9,x9,x4,ror#8
972         orr     x17,x17,x28                     // Ch(e,f,g)
973         eor     x28,x21,x22                     // a^b, b^c in next round
974         eor     x16,x16,x25,ror#41      // Sigma1(e)
975         eor     x10,x10,x21,ror#34
976         add     x20,x20,x17                     // h+=Ch(e,f,g)
977         and     x19,x19,x28                     // (b^c)&=(a^b)
978         eor     x8,x8,x1,ror#61
979         eor     x9,x9,x4,lsr#7  // sigma0(X[i+1])
980         add     x20,x20,x16                     // h+=Sigma1(e)
981         eor     x19,x19,x22                     // Maj(a,b,c)
982         eor     x17,x10,x21,ror#39      // Sigma0(a)
983         eor     x8,x8,x1,lsr#6  // sigma1(X[i+14])
984         add     x3,x3,x12
985         add     x24,x24,x20                     // d+=h
986         add     x20,x20,x19                     // h+=Maj(a,b,c)
987         ldr     x19,[x30],#8            // *K++, x28 in next round
988         add     x3,x3,x9
989         add     x20,x20,x17                     // h+=Sigma0(a)
990         add     x3,x3,x8
991         cbnz    x19,.Loop_16_xx
992
993         ldp     x0,x2,[x29,#96]
994         ldr     x1,[x29,#112]
995         sub     x30,x30,#648            // rewind
996
997         ldp     x3,x4,[x0]
998         ldp     x5,x6,[x0,#2*8]
999         add     x1,x1,#14*8                     // advance input pointer
1000         ldp     x7,x8,[x0,#4*8]
1001         add     x20,x20,x3
1002         ldp     x9,x10,[x0,#6*8]
1003         add     x21,x21,x4
1004         add     x22,x22,x5
1005         add     x23,x23,x6
1006         stp     x20,x21,[x0]
1007         add     x24,x24,x7
1008         add     x25,x25,x8
1009         stp     x22,x23,[x0,#2*8]
1010         add     x26,x26,x9
1011         add     x27,x27,x10
1012         cmp     x1,x2
1013         stp     x24,x25,[x0,#4*8]
1014         stp     x26,x27,[x0,#6*8]
1015         b.ne    .Loop
1016
1017         ldp     x19,x20,[x29,#16]
1018         add     sp,sp,#4*8
1019         ldp     x21,x22,[x29,#32]
1020         ldp     x23,x24,[x29,#48]
1021         ldp     x25,x26,[x29,#64]
1022         ldp     x27,x28,[x29,#80]
1023         ldp     x29,x30,[sp],#128
1024         ret
1025 .size   sha512_block_data_order,.-sha512_block_data_order
1026
1027 .align  6
1028 .type   .LK512,%object
1029 .LK512:
1030         .quad   0x428a2f98d728ae22,0x7137449123ef65cd
1031         .quad   0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
1032         .quad   0x3956c25bf348b538,0x59f111f1b605d019
1033         .quad   0x923f82a4af194f9b,0xab1c5ed5da6d8118
1034         .quad   0xd807aa98a3030242,0x12835b0145706fbe
1035         .quad   0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
1036         .quad   0x72be5d74f27b896f,0x80deb1fe3b1696b1
1037         .quad   0x9bdc06a725c71235,0xc19bf174cf692694
1038         .quad   0xe49b69c19ef14ad2,0xefbe4786384f25e3
1039         .quad   0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
1040         .quad   0x2de92c6f592b0275,0x4a7484aa6ea6e483
1041         .quad   0x5cb0a9dcbd41fbd4,0x76f988da831153b5
1042         .quad   0x983e5152ee66dfab,0xa831c66d2db43210
1043         .quad   0xb00327c898fb213f,0xbf597fc7beef0ee4
1044         .quad   0xc6e00bf33da88fc2,0xd5a79147930aa725
1045         .quad   0x06ca6351e003826f,0x142929670a0e6e70
1046         .quad   0x27b70a8546d22ffc,0x2e1b21385c26c926
1047         .quad   0x4d2c6dfc5ac42aed,0x53380d139d95b3df
1048         .quad   0x650a73548baf63de,0x766a0abb3c77b2a8
1049         .quad   0x81c2c92e47edaee6,0x92722c851482353b
1050         .quad   0xa2bfe8a14cf10364,0xa81a664bbc423001
1051         .quad   0xc24b8b70d0f89791,0xc76c51a30654be30
1052         .quad   0xd192e819d6ef5218,0xd69906245565a910
1053         .quad   0xf40e35855771202a,0x106aa07032bbd1b8
1054         .quad   0x19a4c116b8d2d0c8,0x1e376c085141ab53
1055         .quad   0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
1056         .quad   0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
1057         .quad   0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
1058         .quad   0x748f82ee5defb2fc,0x78a5636f43172f60
1059         .quad   0x84c87814a1f0ab72,0x8cc702081a6439ec
1060         .quad   0x90befffa23631e28,0xa4506cebde82bde9
1061         .quad   0xbef9a3f7b2c67915,0xc67178f2e372532b
1062         .quad   0xca273eceea26619c,0xd186b8c721c0c207
1063         .quad   0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
1064         .quad   0x06f067aa72176fba,0x0a637dc5a2c898a6
1065         .quad   0x113f9804bef90dae,0x1b710b35131c471b
1066         .quad   0x28db77f523047d84,0x32caab7b40c72493
1067         .quad   0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
1068         .quad   0x4cc5d4becb3e42b6,0x597f299cfc657e2a
1069         .quad   0x5fcb6fab3ad6faec,0x6c44198c4a475817
1070         .quad   0       // terminator
1071 .size   .LK512,.-.LK512
1072 #ifndef __KERNEL__
1073 .align  3
1074 .LOPENSSL_armcap_P:
1075 # ifdef __ILP32__
1076         .long   OPENSSL_armcap_P-.
1077 # else
1078         .quad   OPENSSL_armcap_P-.
1079 # endif
1080 #endif
1081 .asciz  "SHA512 block transform for ARMv8, CRYPTOGAMS by <appro@openssl.org>"
1082 .align  2
1083 #ifndef __KERNEL__
1084 .comm   OPENSSL_armcap_P,4,4
1085 #endif