...

Text file src/crypto/aes/asm_arm64.s

Documentation: crypto/aes

     1// Copyright 2017 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5#include "textflag.h"
     6DATA rotInvSRows<>+0x00(SB)/8, $0x080f0205040b0e01
     7DATA rotInvSRows<>+0x08(SB)/8, $0x00070a0d0c030609
     8GLOBL rotInvSRows<>(SB), (NOPTR+RODATA), $16
     9DATA invSRows<>+0x00(SB)/8, $0x0b0e0104070a0d00
    10DATA invSRows<>+0x08(SB)/8, $0x0306090c0f020508
    11GLOBL invSRows<>(SB), (NOPTR+RODATA), $16
    12// func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
    13TEXT ·encryptBlockAsm(SB),NOSPLIT,$0
    14	MOVD	nr+0(FP), R9
    15	MOVD	xk+8(FP), R10
    16	MOVD	dst+16(FP), R11
    17	MOVD	src+24(FP), R12
    18
    19	VLD1	(R12), [V0.B16]
    20
    21	CMP	$12, R9
    22	BLT	enc128
    23	BEQ	enc196
    24enc256:
    25	VLD1.P	32(R10), [V1.B16, V2.B16]
    26	AESE	V1.B16, V0.B16
    27	AESMC	V0.B16, V0.B16
    28	AESE	V2.B16, V0.B16
    29	AESMC	V0.B16, V0.B16
    30enc196:
    31	VLD1.P	32(R10), [V3.B16, V4.B16]
    32	AESE	V3.B16, V0.B16
    33	AESMC	V0.B16, V0.B16
    34	AESE	V4.B16, V0.B16
    35	AESMC	V0.B16, V0.B16
    36enc128:
    37	VLD1.P	64(R10), [V5.B16, V6.B16, V7.B16, V8.B16]
    38	VLD1.P	64(R10), [V9.B16, V10.B16, V11.B16, V12.B16]
    39	VLD1.P	48(R10), [V13.B16, V14.B16, V15.B16]
    40	AESE	V5.B16, V0.B16
    41	AESMC	V0.B16, V0.B16
    42	AESE	V6.B16, V0.B16
    43	AESMC	V0.B16, V0.B16
    44	AESE	V7.B16, V0.B16
    45	AESMC	V0.B16, V0.B16
    46	AESE	V8.B16, V0.B16
    47	AESMC	V0.B16, V0.B16
    48	AESE	V9.B16, V0.B16
    49	AESMC	V0.B16, V0.B16
    50	AESE	V10.B16, V0.B16
    51	AESMC	V0.B16, V0.B16
    52	AESE	V11.B16, V0.B16
    53	AESMC	V0.B16, V0.B16
    54	AESE	V12.B16, V0.B16
    55	AESMC	V0.B16, V0.B16
    56	AESE	V13.B16, V0.B16
    57	AESMC	V0.B16, V0.B16
    58	AESE	V14.B16, V0.B16
    59	VEOR    V0.B16, V15.B16, V0.B16
    60	VST1	[V0.B16], (R11)
    61	RET
    62
    63// func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
    64TEXT ·decryptBlockAsm(SB),NOSPLIT,$0
    65	MOVD	nr+0(FP), R9
    66	MOVD	xk+8(FP), R10
    67	MOVD	dst+16(FP), R11
    68	MOVD	src+24(FP), R12
    69
    70	VLD1	(R12), [V0.B16]
    71
    72	CMP	$12, R9
    73	BLT	dec128
    74	BEQ	dec196
    75dec256:
    76	VLD1.P	32(R10), [V1.B16, V2.B16]
    77	AESD	V1.B16, V0.B16
    78	AESIMC	V0.B16, V0.B16
    79	AESD	V2.B16, V0.B16
    80	AESIMC	V0.B16, V0.B16
    81dec196:
    82	VLD1.P	32(R10), [V3.B16, V4.B16]
    83	AESD	V3.B16, V0.B16
    84	AESIMC	V0.B16, V0.B16
    85	AESD	V4.B16, V0.B16
    86	AESIMC	V0.B16, V0.B16
    87dec128:
    88	VLD1.P	64(R10), [V5.B16, V6.B16, V7.B16, V8.B16]
    89	VLD1.P	64(R10), [V9.B16, V10.B16, V11.B16, V12.B16]
    90	VLD1.P	48(R10), [V13.B16, V14.B16, V15.B16]
    91	AESD	V5.B16, V0.B16
    92	AESIMC	V0.B16, V0.B16
    93	AESD	V6.B16, V0.B16
    94	AESIMC	V0.B16, V0.B16
    95	AESD	V7.B16, V0.B16
    96	AESIMC	V0.B16, V0.B16
    97	AESD	V8.B16, V0.B16
    98	AESIMC	V0.B16, V0.B16
    99	AESD	V9.B16, V0.B16
   100	AESIMC	V0.B16, V0.B16
   101	AESD	V10.B16, V0.B16
   102	AESIMC	V0.B16, V0.B16
   103	AESD	V11.B16, V0.B16
   104	AESIMC	V0.B16, V0.B16
   105	AESD	V12.B16, V0.B16
   106	AESIMC	V0.B16, V0.B16
   107	AESD	V13.B16, V0.B16
   108	AESIMC	V0.B16, V0.B16
   109	AESD	V14.B16, V0.B16
   110	VEOR    V0.B16, V15.B16, V0.B16
   111	VST1	[V0.B16], (R11)
   112	RET
   113
   114// func expandKeyAsm(nr int, key *byte, enc, dec *uint32) {
   115// Note that round keys are stored in uint128 format, not uint32
   116TEXT ·expandKeyAsm(SB),NOSPLIT,$0
   117	MOVD	nr+0(FP), R8
   118	MOVD	key+8(FP), R9
   119	MOVD	enc+16(FP), R10
   120	MOVD	dec+24(FP), R11
   121	LDP	rotInvSRows<>(SB), (R0, R1)
   122	VMOV	R0, V3.D[0]
   123	VMOV	R1, V3.D[1]
   124	VEOR	V0.B16, V0.B16, V0.B16 // All zeroes
   125	MOVW	$1, R13
   126	TBZ	$1, R8, ks192
   127	TBNZ	$2, R8, ks256
   128	LDPW	(R9), (R4, R5)
   129	LDPW	8(R9), (R6, R7)
   130	STPW.P	(R4, R5), 8(R10)
   131	STPW.P	(R6, R7), 8(R10)
   132	MOVW	$0x1b, R14
   133ks128Loop:
   134		VMOV	R7, V2.S[0]
   135		WORD	$0x4E030042       // TBL V3.B16, [V2.B16], V2.B16
   136		AESE	V0.B16, V2.B16    // Use AES to compute the SBOX
   137		EORW	R13, R4
   138		LSLW	$1, R13           // Compute next Rcon
   139		ANDSW	$0x100, R13, ZR
   140		CSELW	NE, R14, R13, R13 // Fake modulo
   141		SUBS	$1, R8
   142		VMOV	V2.S[0], R0
   143		EORW	R0, R4
   144		EORW	R4, R5
   145		EORW	R5, R6
   146		EORW	R6, R7
   147		STPW.P	(R4, R5), 8(R10)
   148		STPW.P	(R6, R7), 8(R10)
   149	BNE	ks128Loop
   150	CBZ	R11, ksDone       // If dec is nil we are done
   151	SUB	$176, R10
   152	// Decryption keys are encryption keys with InverseMixColumns applied
   153	VLD1.P	64(R10), [V0.B16, V1.B16, V2.B16, V3.B16]
   154	VMOV	V0.B16, V7.B16
   155	AESIMC	V1.B16, V6.B16
   156	AESIMC	V2.B16, V5.B16
   157	AESIMC	V3.B16, V4.B16
   158	VLD1.P	64(R10), [V0.B16, V1.B16, V2.B16, V3.B16]
   159	AESIMC	V0.B16, V11.B16
   160	AESIMC	V1.B16, V10.B16
   161	AESIMC	V2.B16, V9.B16
   162	AESIMC	V3.B16, V8.B16
   163	VLD1	(R10), [V0.B16, V1.B16, V2.B16]
   164	AESIMC	V0.B16, V14.B16
   165	AESIMC	V1.B16, V13.B16
   166	VMOV	V2.B16, V12.B16
   167	VST1.P	[V12.B16, V13.B16, V14.B16], 48(R11)
   168	VST1.P	[V8.B16, V9.B16, V10.B16, V11.B16], 64(R11)
   169	VST1	[V4.B16, V5.B16, V6.B16, V7.B16], (R11)
   170	B	ksDone
   171ks192:
   172	LDPW	(R9), (R2, R3)
   173	LDPW	8(R9), (R4, R5)
   174	LDPW	16(R9), (R6, R7)
   175	STPW.P	(R2, R3), 8(R10)
   176	STPW.P	(R4, R5), 8(R10)
   177	SUB	$4, R8
   178ks192Loop:
   179		STPW.P	(R6, R7), 8(R10)
   180		VMOV	R7, V2.S[0]
   181		WORD	$0x4E030042 //TBL	V3.B16, [V2.B16], V2.B16
   182		AESE	V0.B16, V2.B16
   183		EORW	R13, R2
   184		LSLW	$1, R13
   185		SUBS	$1, R8
   186		VMOV	V2.S[0], R0
   187		EORW	R0, R2
   188		EORW	R2, R3
   189		EORW	R3, R4
   190		EORW	R4, R5
   191		EORW	R5, R6
   192		EORW	R6, R7
   193		STPW.P	(R2, R3), 8(R10)
   194		STPW.P	(R4, R5), 8(R10)
   195	BNE	ks192Loop
   196	CBZ	R11, ksDone
   197	SUB	$208, R10
   198	VLD1.P	64(R10), [V0.B16, V1.B16, V2.B16, V3.B16]
   199	VMOV	V0.B16, V7.B16
   200	AESIMC	V1.B16, V6.B16
   201	AESIMC	V2.B16, V5.B16
   202	AESIMC	V3.B16, V4.B16
   203	VLD1.P	64(R10), [V0.B16, V1.B16, V2.B16, V3.B16]
   204	AESIMC	V0.B16, V11.B16
   205	AESIMC	V1.B16, V10.B16
   206	AESIMC	V2.B16, V9.B16
   207	AESIMC	V3.B16, V8.B16
   208	VLD1.P	64(R10), [V0.B16, V1.B16, V2.B16, V3.B16]
   209	AESIMC	V0.B16, V15.B16
   210	AESIMC	V1.B16, V14.B16
   211	AESIMC	V2.B16, V13.B16
   212	AESIMC	V3.B16, V12.B16
   213	VLD1	(R10), [V0.B16]
   214	VST1.P	[V0.B16], 16(R11)
   215	VST1.P	[V12.B16, V13.B16, V14.B16, V15.B16], 64(R11)
   216	VST1.P	[V8.B16, V9.B16, V10.B16, V11.B16], 64(R11)
   217	VST1	[V4.B16, V5.B16, V6.B16, V7.B16], (R11)
   218	B	ksDone
   219ks256:
   220	LDP	invSRows<>(SB), (R0, R1)
   221	VMOV	R0, V4.D[0]
   222	VMOV	R1, V4.D[1]
   223	LDPW	(R9), (R0, R1)
   224	LDPW	8(R9), (R2, R3)
   225	LDPW	16(R9), (R4, R5)
   226	LDPW	24(R9), (R6, R7)
   227	STPW.P	(R0, R1), 8(R10)
   228	STPW.P	(R2, R3), 8(R10)
   229	SUB	$7, R8
   230ks256Loop:
   231		STPW.P	(R4, R5), 8(R10)
   232		STPW.P	(R6, R7), 8(R10)
   233		VMOV	R7, V2.S[0]
   234		WORD	$0x4E030042 //TBL	V3.B16, [V2.B16], V2.B16
   235		AESE	V0.B16, V2.B16
   236		EORW	R13, R0
   237		LSLW	$1, R13
   238		SUBS	$1, R8
   239		VMOV	V2.S[0], R9
   240		EORW	R9, R0
   241		EORW	R0, R1
   242		EORW	R1, R2
   243		EORW	R2, R3
   244		VMOV	R3, V2.S[0]
   245		WORD	$0x4E040042 //TBL	V3.B16, [V2.B16], V2.B16
   246		AESE	V0.B16, V2.B16
   247		VMOV	V2.S[0], R9
   248		EORW	R9, R4
   249		EORW	R4, R5
   250		EORW	R5, R6
   251		EORW	R6, R7
   252		STPW.P	(R0, R1), 8(R10)
   253		STPW.P	(R2, R3), 8(R10)
   254	BNE	ks256Loop
   255	CBZ	R11, ksDone
   256	SUB	$240, R10
   257	VLD1.P	64(R10), [V0.B16, V1.B16, V2.B16, V3.B16]
   258	VMOV	V0.B16, V7.B16
   259	AESIMC	V1.B16, V6.B16
   260	AESIMC	V2.B16, V5.B16
   261	AESIMC	V3.B16, V4.B16
   262	VLD1.P	64(R10), [V0.B16, V1.B16, V2.B16, V3.B16]
   263	AESIMC	V0.B16, V11.B16
   264	AESIMC	V1.B16, V10.B16
   265	AESIMC	V2.B16, V9.B16
   266	AESIMC	V3.B16, V8.B16
   267	VLD1.P	64(R10), [V0.B16, V1.B16, V2.B16, V3.B16]
   268	AESIMC	V0.B16, V15.B16
   269	AESIMC	V1.B16, V14.B16
   270	AESIMC	V2.B16, V13.B16
   271	AESIMC	V3.B16, V12.B16
   272	VLD1	(R10), [V0.B16, V1.B16, V2.B16]
   273	AESIMC	V0.B16, V18.B16
   274	AESIMC	V1.B16, V17.B16
   275	VMOV	V2.B16, V16.B16
   276	VST1.P	[V16.B16, V17.B16, V18.B16], 48(R11)
   277	VST1.P	[V12.B16, V13.B16, V14.B16, V15.B16], 64(R11)
   278	VST1.P	[V8.B16, V9.B16, V10.B16, V11.B16], 64(R11)
   279	VST1	[V4.B16, V5.B16, V6.B16, V7.B16], (R11)
   280ksDone:
   281	RET

View as plain text