...

Text file src/crypto/aes/asm_ppc64x.s

Documentation: crypto/aes

     1// Copyright 2016 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5//go:build ppc64 || ppc64le
     6
     7// Based on CRYPTOGAMS code with the following comment:
     8// # ====================================================================
     9// # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
    10// # project. The module is, however, dual licensed under OpenSSL and
    11// # CRYPTOGAMS licenses depending on where you obtain it. For further
    12// # details see http://www.openssl.org/~appro/cryptogams/.
    13// # ====================================================================
    14
    15// Original code can be found at the link below:
    16// https://github.com/dot-asm/cryptogams/blob/master/ppc/aesp8-ppc.pl
    17
    18// Some function names were changed to be consistent with Go function
    19// names. For instance, function aes_p8_set_{en,de}crypt_key become
    20// set{En,De}cryptKeyAsm. I also split setEncryptKeyAsm in two parts
    21// and a new session was created (doEncryptKeyAsm). This was necessary to
    22// avoid arguments overwriting when setDecryptKeyAsm calls setEncryptKeyAsm.
    23// There were other modifications as well but kept the same functionality.
    24
    25#include "textflag.h"
    26
    27// For expandKeyAsm
    28#define INP     R3
    29#define BITS    R4
    30#define OUTENC  R5 // Pointer to next expanded encrypt key
    31#define PTR     R6
    32#define CNT     R7
    33#define ROUNDS  R8
    34#define OUTDEC  R9  // Pointer to next expanded decrypt key
    35#define TEMP    R19
    36#define ZERO    V0
    37#define IN0     V1
    38#define IN1     V2
    39#define KEY     V3
    40#define RCON    V4
    41#define MASK    V5
    42#define TMP     V6
    43#define STAGE   V7
    44#define OUTPERM V8
    45#define OUTMASK V9
    46#define OUTHEAD V10
    47#define OUTTAIL V11
    48
    49// For P9 instruction emulation
    50#define ESPERM  V21  // Endian swapping permute into BE
    51#define TMP2    V22  // Temporary for P8_STXVB16X/P8_STXVB16X
    52
    53// For {en,de}cryptBlockAsm
    54#define BLK_INP    R3
    55#define BLK_OUT    R4
    56#define BLK_KEY    R5
    57#define BLK_ROUNDS R6
    58#define BLK_IDX    R7
    59
    60DATA ·rcon+0x00(SB)/8, $0x0f0e0d0c0b0a0908 // Permute for vector doubleword endian swap
    61DATA ·rcon+0x08(SB)/8, $0x0706050403020100
    62DATA ·rcon+0x10(SB)/8, $0x0100000001000000 // RCON
    63DATA ·rcon+0x18(SB)/8, $0x0100000001000000 // RCON
    64DATA ·rcon+0x20(SB)/8, $0x1b0000001b000000
    65DATA ·rcon+0x28(SB)/8, $0x1b0000001b000000
    66DATA ·rcon+0x30(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK
    67DATA ·rcon+0x38(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK
    68DATA ·rcon+0x40(SB)/8, $0x0000000000000000
    69DATA ·rcon+0x48(SB)/8, $0x0000000000000000
    70GLOBL ·rcon(SB), RODATA, $80
    71
    72#ifdef GOARCH_ppc64le
    73#  ifdef GOPPC64_power9
    74#define P8_LXVB16X(RA,RB,VT)  LXVB16X	(RA+RB), VT
    75#define P8_STXVB16X(VS,RA,RB) STXVB16X	VS, (RA+RB)
    76#define XXBRD_ON_LE(VA,VT)    XXBRD	VA, VT
    77#  else
    78// On POWER8/ppc64le, emulate the POWER9 instructions by loading unaligned
    79// doublewords and byte-swapping each doubleword to emulate BE load/stores.
    80#define NEEDS_ESPERM
    81#define P8_LXVB16X(RA,RB,VT) \
    82	LXVD2X	(RA+RB), VT \
    83	VPERM	VT, VT, ESPERM, VT
    84
    85#define P8_STXVB16X(VS,RA,RB) \
    86	VPERM	VS, VS, ESPERM, TMP2 \
    87	STXVD2X	TMP2, (RA+RB)
    88
    89#define XXBRD_ON_LE(VA,VT) \
    90	VPERM	VA, VA, ESPERM, VT
    91
    92#  endif // defined(GOPPC64_power9)
    93#else
    94#define P8_LXVB16X(RA,RB,VT)  LXVD2X	(RA+RB), VT
    95#define P8_STXVB16X(VS,RA,RB) STXVD2X	VS, (RA+RB)
    96#define XXBRD_ON_LE(VA, VT)
    97#endif // defined(GOARCH_ppc64le)
    98
    99// func setEncryptKeyAsm(nr int, key *byte, enc *uint32, dec *uint32)
   100TEXT ·expandKeyAsm(SB), NOSPLIT|NOFRAME, $0
   101	// Load the arguments inside the registers
   102	MOVD	nr+0(FP), ROUNDS
   103	MOVD	key+8(FP), INP
   104	MOVD	enc+16(FP), OUTENC
   105	MOVD	dec+24(FP), OUTDEC
   106
   107#ifdef NEEDS_ESPERM
   108	MOVD	$·rcon(SB), PTR // PTR points to rcon addr
   109	LVX	(PTR), ESPERM
   110	ADD	$0x10, PTR
   111#else
   112	MOVD	$·rcon+0x10(SB), PTR // PTR points to rcon addr (skipping permute vector)
   113#endif
   114
   115	// Get key from memory and write aligned into VR
   116	P8_LXVB16X(INP, R0, IN0)
   117	ADD	$0x10, INP, INP
   118	MOVD	$0x20, TEMP
   119
   120	CMPW	ROUNDS, $12
   121	LVX	(PTR)(R0), RCON    // lvx   4,0,6      Load first 16 bytes into RCON
   122	LVX	(PTR)(TEMP), MASK
   123	ADD	$0x10, PTR, PTR    // addi  6,6,0x10   PTR to next 16 bytes of RCON
   124	MOVD	$8, CNT            // li    7,8        CNT = 8
   125	VXOR	ZERO, ZERO, ZERO   // vxor  0,0,0      Zero to be zero :)
   126	MOVD	CNT, CTR           // mtctr 7          Set the counter to 8 (rounds)
   127
   128	// The expanded decrypt key is the expanded encrypt key stored in reverse order.
   129	// Move OUTDEC to the last key location, and store in descending order.
   130	ADD	$160, OUTDEC, OUTDEC
   131	BLT	loop128
   132	ADD	$32, OUTDEC, OUTDEC
   133	BEQ	l192
   134	ADD	$32, OUTDEC, OUTDEC
   135	JMP	l256
   136
   137loop128:
   138	// Key schedule (Round 1 to 8)
   139	VPERM	IN0, IN0, MASK, KEY              // vperm 3,1,1,5         Rotate-n-splat
   140	VSLDOI	$12, ZERO, IN0, TMP              // vsldoi 6,0,1,12
   141	STXVD2X	IN0, (R0+OUTENC)
   142	STXVD2X	IN0, (R0+OUTDEC)
   143	VCIPHERLAST	KEY, RCON, KEY           // vcipherlast 3,3,4
   144	ADD	$16, OUTENC, OUTENC
   145	ADD	$-16, OUTDEC, OUTDEC
   146
   147	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   148	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
   149	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   150	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
   151	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   152	VADDUWM	RCON, RCON, RCON    // vadduwm 4,4,4
   153	VXOR	IN0, KEY, IN0       // vxor 1,1,3
   154	BC	0x10, 0, loop128    // bdnz .Loop128
   155
   156	LVX	(PTR)(R0), RCON // lvx 4,0,6     Last two round keys
   157
   158	// Key schedule (Round 9)
   159	VPERM	IN0, IN0, MASK, KEY              // vperm 3,1,1,5   Rotate-n-spat
   160	VSLDOI	$12, ZERO, IN0, TMP              // vsldoi 6,0,1,12
   161	STXVD2X	IN0, (R0+OUTENC)
   162	STXVD2X	IN0, (R0+OUTDEC)
   163	VCIPHERLAST	KEY, RCON, KEY           // vcipherlast 3,3,4
   164	ADD	$16, OUTENC, OUTENC
   165	ADD	$-16, OUTDEC, OUTDEC
   166
   167	// Key schedule (Round 10)
   168	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   169	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
   170	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   171	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
   172	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   173	VADDUWM	RCON, RCON, RCON    // vadduwm 4,4,4
   174	VXOR	IN0, KEY, IN0       // vxor 1,1,3
   175
   176	VPERM	IN0, IN0, MASK, KEY              // vperm 3,1,1,5   Rotate-n-splat
   177	VSLDOI	$12, ZERO, IN0, TMP              // vsldoi 6,0,1,12
   178	STXVD2X	IN0, (R0+OUTENC)
   179	STXVD2X	IN0, (R0+OUTDEC)
   180	VCIPHERLAST	KEY, RCON, KEY           // vcipherlast 3,3,4
   181	ADD	$16, OUTENC, OUTENC
   182	ADD	$-16, OUTDEC, OUTDEC
   183
   184	// Key schedule (Round 11)
   185	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   186	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
   187	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   188	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
   189	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   190	VXOR	IN0, KEY, IN0                    // vxor 1,1,3
   191	STXVD2X	IN0, (R0+OUTENC)
   192	STXVD2X	IN0, (R0+OUTDEC)
   193
   194	RET
   195
   196l192:
   197	LXSDX	(INP+R0), IN1                    // Load next 8 bytes into upper half of VSR.
   198	XXBRD_ON_LE(IN1, IN1)                    // and convert to BE ordering on LE hosts.
   199	MOVD	$4, CNT                          // li 7,4
   200	STXVD2X	IN0, (R0+OUTENC)
   201	STXVD2X	IN0, (R0+OUTDEC)
   202	ADD	$16, OUTENC, OUTENC
   203	ADD	$-16, OUTDEC, OUTDEC
   204	VSPLTISB	$8, KEY                  // vspltisb 3,8
   205	MOVD	CNT, CTR                         // mtctr 7
   206	VSUBUBM	MASK, KEY, MASK                  // vsububm 5,5,3
   207
   208loop192:
   209	VPERM	IN1, IN1, MASK, KEY // vperm 3,2,2,5
   210	VSLDOI	$12, ZERO, IN0, TMP // vsldoi 6,0,1,12
   211	VCIPHERLAST	KEY, RCON, KEY      // vcipherlast 3,3,4
   212
   213	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   214	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
   215	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   216	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
   217	VXOR	IN0, TMP, IN0       // vxor 1,1,6
   218
   219	VSLDOI	$8, ZERO, IN1, STAGE  // vsldoi 7,0,2,8
   220	VSPLTW	$3, IN0, TMP          // vspltw 6,1,3
   221	VXOR	TMP, IN1, TMP         // vxor 6,6,2
   222	VSLDOI	$12, ZERO, IN1, IN1   // vsldoi 2,0,2,12
   223	VADDUWM	RCON, RCON, RCON      // vadduwm 4,4,4
   224	VXOR	IN1, TMP, IN1         // vxor 2,2,6
   225	VXOR	IN0, KEY, IN0         // vxor 1,1,3
   226	VXOR	IN1, KEY, IN1         // vxor 2,2,3
   227	VSLDOI	$8, STAGE, IN0, STAGE // vsldoi 7,7,1,8
   228
   229	VPERM	IN1, IN1, MASK, KEY              // vperm 3,2,2,5
   230	VSLDOI	$12, ZERO, IN0, TMP              // vsldoi 6,0,1,12
   231	STXVD2X	STAGE, (R0+OUTENC)
   232	STXVD2X	STAGE, (R0+OUTDEC)
   233	VCIPHERLAST	KEY, RCON, KEY           // vcipherlast 3,3,4
   234	ADD	$16, OUTENC, OUTENC
   235	ADD	$-16, OUTDEC, OUTDEC
   236
   237	VSLDOI	$8, IN0, IN1, STAGE              // vsldoi 7,1,2,8
   238	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   239	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
   240	STXVD2X	STAGE, (R0+OUTENC)
   241	STXVD2X	STAGE, (R0+OUTDEC)
   242	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   243	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
   244	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   245	ADD	$16, OUTENC, OUTENC
   246	ADD	$-16, OUTDEC, OUTDEC
   247
   248	VSPLTW	$3, IN0, TMP                     // vspltw 6,1,3
   249	VXOR	TMP, IN1, TMP                    // vxor 6,6,2
   250	VSLDOI	$12, ZERO, IN1, IN1              // vsldoi 2,0,2,12
   251	VADDUWM	RCON, RCON, RCON                 // vadduwm 4,4,4
   252	VXOR	IN1, TMP, IN1                    // vxor 2,2,6
   253	VXOR	IN0, KEY, IN0                    // vxor 1,1,3
   254	VXOR	IN1, KEY, IN1                    // vxor 2,2,3
   255	STXVD2X	IN0, (R0+OUTENC)
   256	STXVD2X	IN0, (R0+OUTDEC)
   257	ADD	$16, OUTENC, OUTENC
   258	ADD	$-16, OUTDEC, OUTDEC
   259	BC	0x10, 0, loop192                 // bdnz .Loop192
   260
   261	RET
   262
   263l256:
   264	P8_LXVB16X(INP, R0, IN1)
   265	MOVD	$7, CNT                          // li 7,7
   266	STXVD2X	IN0, (R0+OUTENC)
   267	STXVD2X	IN0, (R0+OUTDEC)
   268	ADD	$16, OUTENC, OUTENC
   269	ADD	$-16, OUTDEC, OUTDEC
   270	MOVD	CNT, CTR                         // mtctr 7
   271
   272loop256:
   273	VPERM	IN1, IN1, MASK, KEY              // vperm 3,2,2,5
   274	VSLDOI	$12, ZERO, IN0, TMP              // vsldoi 6,0,1,12
   275	STXVD2X	IN1, (R0+OUTENC)
   276	STXVD2X	IN1, (R0+OUTDEC)
   277	VCIPHERLAST	KEY, RCON, KEY           // vcipherlast 3,3,4
   278	ADD	$16, OUTENC, OUTENC
   279	ADD	$-16, OUTDEC, OUTDEC
   280
   281	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   282	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
   283	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   284	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
   285	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
   286	VADDUWM	RCON, RCON, RCON                 // vadduwm 4,4,4
   287	VXOR	IN0, KEY, IN0                    // vxor 1,1,3
   288	STXVD2X	IN0, (R0+OUTENC)
   289	STXVD2X	IN0, (R0+OUTDEC)
   290	ADD	$16, OUTENC, OUTENC
   291	ADD	$-16, OUTDEC, OUTDEC
   292	BC	0x12, 0, done                    // bdz .Ldone
   293
   294	VSPLTW	$3, IN0, KEY        // vspltw 3,1,3
   295	VSLDOI	$12, ZERO, IN1, TMP // vsldoi 6,0,2,12
   296	VSBOX	KEY, KEY            // vsbox 3,3
   297
   298	VXOR	IN1, TMP, IN1       // vxor 2,2,6
   299	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
   300	VXOR	IN1, TMP, IN1       // vxor 2,2,6
   301	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
   302	VXOR	IN1, TMP, IN1       // vxor 2,2,6
   303
   304	VXOR	IN1, KEY, IN1 // vxor 2,2,3
   305	JMP	loop256       // b .Loop256
   306
   307done:
   308	RET
   309
   310// func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
   311TEXT ·encryptBlockAsm(SB), NOSPLIT|NOFRAME, $0
   312	MOVD	nr+0(FP), R6   // Round count/Key size
   313	MOVD	xk+8(FP), R5   // Key pointer
   314	MOVD	dst+16(FP), R3 // Dest pointer
   315	MOVD	src+24(FP), R4 // Src pointer
   316#ifdef NEEDS_ESPERM
   317	MOVD	$·rcon(SB), R7
   318	LVX	(R7), ESPERM   // Permute value for P8_ macros.
   319#endif
   320
   321	// Set CR{1,2,3}EQ to hold the key size information.
   322	CMPU	R6, $10, CR1
   323	CMPU	R6, $12, CR2
   324	CMPU	R6, $14, CR3
   325
   326	MOVD	$16, R6
   327	MOVD	$32, R7
   328	MOVD	$48, R8
   329	MOVD	$64, R9
   330	MOVD	$80, R10
   331	MOVD	$96, R11
   332	MOVD	$112, R12
   333
   334	// Load text in BE order
   335	P8_LXVB16X(R4, R0, V0)
   336
   337	// V1, V2 will hold keys, V0 is a temp.
   338	// At completion, V2 will hold the ciphertext.
   339	// Load xk[0:3] and xor with text
   340	LXVD2X	(R0+R5), V1
   341	VXOR	V0, V1, V0
   342
   343	// Load xk[4:11] and cipher
   344	LXVD2X	(R6+R5), V1
   345	LXVD2X	(R7+R5), V2
   346	VCIPHER	V0, V1, V0
   347	VCIPHER	V0, V2, V0
   348
   349	// Load xk[12:19] and cipher
   350	LXVD2X	(R8+R5), V1
   351	LXVD2X	(R9+R5), V2
   352	VCIPHER	V0, V1, V0
   353	VCIPHER	V0, V2, V0
   354
   355	// Load xk[20:27] and cipher
   356	LXVD2X	(R10+R5), V1
   357	LXVD2X	(R11+R5), V2
   358	VCIPHER	V0, V1, V0
   359	VCIPHER	V0, V2, V0
   360
   361	// Increment xk pointer to reuse constant offsets in R6-R12.
   362	ADD	$112, R5
   363
   364	// Load xk[28:35] and cipher
   365	LXVD2X	(R0+R5), V1
   366	LXVD2X	(R6+R5), V2
   367	VCIPHER	V0, V1, V0
   368	VCIPHER	V0, V2, V0
   369
   370	// Load xk[36:43] and cipher
   371	LXVD2X	(R7+R5), V1
   372	LXVD2X	(R8+R5), V2
   373	BEQ	CR1, Ldec_tail // Key size 10?
   374	VCIPHER	V0, V1, V0
   375	VCIPHER	V0, V2, V0
   376
   377	// Load xk[44:51] and cipher
   378	LXVD2X	(R9+R5), V1
   379	LXVD2X	(R10+R5), V2
   380	BEQ	CR2, Ldec_tail // Key size 12?
   381	VCIPHER	V0, V1, V0
   382	VCIPHER	V0, V2, V0
   383
   384	// Load xk[52:59] and cipher
   385	LXVD2X	(R11+R5), V1
   386	LXVD2X	(R12+R5), V2
   387	BNE	CR3, Linvalid_key_len // Not key size 14?
   388	// Fallthrough to final cipher
   389
   390Ldec_tail:
   391	// Cipher last two keys such that key information is
   392	// cleared from V1 and V2.
   393	VCIPHER		V0, V1, V1
   394	VCIPHERLAST	V1, V2, V2
   395
   396	// Store the result in BE order.
   397	P8_STXVB16X(V2, R3, R0)
   398	RET
   399
   400Linvalid_key_len:
   401	// Segfault, this should never happen. Only 3 keys sizes are created/used.
   402	MOVD	R0, 0(R0)
   403	RET
   404
   405// func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
   406TEXT ·decryptBlockAsm(SB), NOSPLIT|NOFRAME, $0
   407	MOVD	nr+0(FP), R6   // Round count/Key size
   408	MOVD	xk+8(FP), R5   // Key pointer
   409	MOVD	dst+16(FP), R3 // Dest pointer
   410	MOVD	src+24(FP), R4 // Src pointer
   411#ifdef NEEDS_ESPERM
   412	MOVD	$·rcon(SB), R7
   413	LVX	(R7), ESPERM   // Permute value for P8_ macros.
   414#endif
   415
   416	// Set CR{1,2,3}EQ to hold the key size information.
   417	CMPU	R6, $10, CR1
   418	CMPU	R6, $12, CR2
   419	CMPU	R6, $14, CR3
   420
   421	MOVD	$16, R6
   422	MOVD	$32, R7
   423	MOVD	$48, R8
   424	MOVD	$64, R9
   425	MOVD	$80, R10
   426	MOVD	$96, R11
   427	MOVD	$112, R12
   428
   429	// Load text in BE order
   430	P8_LXVB16X(R4, R0, V0)
   431
   432	// V1, V2 will hold keys, V0 is a temp.
   433	// At completion, V2 will hold the text.
   434	// Load xk[0:3] and xor with ciphertext
   435	LXVD2X	(R0+R5), V1
   436	VXOR	V0, V1, V0
   437
   438	// Load xk[4:11] and cipher
   439	LXVD2X	(R6+R5), V1
   440	LXVD2X	(R7+R5), V2
   441	VNCIPHER	V0, V1, V0
   442	VNCIPHER	V0, V2, V0
   443
   444	// Load xk[12:19] and cipher
   445	LXVD2X	(R8+R5), V1
   446	LXVD2X	(R9+R5), V2
   447	VNCIPHER	V0, V1, V0
   448	VNCIPHER	V0, V2, V0
   449
   450	// Load xk[20:27] and cipher
   451	LXVD2X	(R10+R5), V1
   452	LXVD2X	(R11+R5), V2
   453	VNCIPHER	V0, V1, V0
   454	VNCIPHER	V0, V2, V0
   455
   456	// Increment xk pointer to reuse constant offsets in R6-R12.
   457	ADD	$112, R5
   458
   459	// Load xk[28:35] and cipher
   460	LXVD2X	(R0+R5), V1
   461	LXVD2X	(R6+R5), V2
   462	VNCIPHER	V0, V1, V0
   463	VNCIPHER	V0, V2, V0
   464
   465	// Load xk[36:43] and cipher
   466	LXVD2X	(R7+R5), V1
   467	LXVD2X	(R8+R5), V2
   468	BEQ	CR1, Ldec_tail // Key size 10?
   469	VNCIPHER	V0, V1, V0
   470	VNCIPHER	V0, V2, V0
   471
   472	// Load xk[44:51] and cipher
   473	LXVD2X	(R9+R5), V1
   474	LXVD2X	(R10+R5), V2
   475	BEQ	CR2, Ldec_tail // Key size 12?
   476	VNCIPHER	V0, V1, V0
   477	VNCIPHER	V0, V2, V0
   478
   479	// Load xk[52:59] and cipher
   480	LXVD2X	(R11+R5), V1
   481	LXVD2X	(R12+R5), V2
   482	BNE	CR3, Linvalid_key_len // Not key size 14?
   483	// Fallthrough to final cipher
   484
   485Ldec_tail:
   486	// Cipher last two keys such that key information is
   487	// cleared from V1 and V2.
   488	VNCIPHER	V0, V1, V1
   489	VNCIPHERLAST	V1, V2, V2
   490
   491	// Store the result in BE order.
   492	P8_STXVB16X(V2, R3, R0)
   493	RET
   494
   495Linvalid_key_len:
   496	// Segfault, this should never happen. Only 3 keys sizes are created/used.
   497	MOVD	R0, 0(R0)
   498	RET
   499
   500// Remove defines from above so they can be defined here
   501#undef INP
   502#undef OUTENC
   503#undef ROUNDS
   504#undef KEY
   505#undef TMP
   506
   507#define INP R3
   508#define OUTP R4
   509#define LEN R5
   510#define KEYP R6
   511#define ROUNDS R7
   512#define IVP R8
   513#define ENC R9
   514
   515#define INOUT V2
   516#define TMP V3
   517#define IVEC V4
   518
   519// Load the crypt key into VSRs.
   520//
   521// The expanded key is stored and loaded using
   522// STXVD2X/LXVD2X. The in-memory byte ordering
   523// depends on the endianness of the machine. The
   524// expanded keys are generated by expandKeyAsm above.
   525//
   526// Rkeyp holds the key pointer. It is clobbered. Once
   527// the expanded keys are loaded, it is not needed.
   528//
   529// R12,R14-R21 are scratch registers.
   530// For keyp of 10, V6, V11-V20 hold the expanded key.
   531// For keyp of 12, V6, V9-V20 hold the expanded key.
   532// For keyp of 14, V6, V7-V20 hold the expanded key.
   533#define LOAD_KEY(Rkeyp) \
   534	MOVD	$16, R12 \
   535	MOVD	$32, R14 \
   536	MOVD	$48, R15 \
   537	MOVD	$64, R16 \
   538	MOVD	$80, R17 \
   539	MOVD	$96, R18 \
   540	MOVD	$112, R19 \
   541	MOVD	$128, R20 \
   542	MOVD	$144, R21 \
   543	LXVD2X	(R0+Rkeyp), V6 \
   544	ADD	$16, Rkeyp \
   545	BEQ	CR1, L_start10 \
   546	BEQ	CR2, L_start12 \
   547	LXVD2X	(R0+Rkeyp), V7 \
   548	LXVD2X	(R12+Rkeyp), V8 \
   549	ADD	$32, Rkeyp \
   550	L_start12: \
   551	LXVD2X	(R0+Rkeyp), V9 \
   552	LXVD2X	(R12+Rkeyp), V10 \
   553	ADD	$32, Rkeyp \
   554	L_start10: \
   555	LXVD2X	(R0+Rkeyp), V11 \
   556	LXVD2X	(R12+Rkeyp), V12 \
   557	LXVD2X	(R14+Rkeyp), V13 \
   558	LXVD2X	(R15+Rkeyp), V14 \
   559	LXVD2X	(R16+Rkeyp), V15 \
   560	LXVD2X	(R17+Rkeyp), V16 \
   561	LXVD2X	(R18+Rkeyp), V17 \
   562	LXVD2X	(R19+Rkeyp), V18 \
   563	LXVD2X	(R20+Rkeyp), V19 \
   564	LXVD2X	(R21+Rkeyp), V20
   565
   566// Perform aes cipher operation for keysize 10/12/14 using the keys
   567// loaded by LOAD_KEY, and key size information held in CR1EQ/CR2EQ.
   568//
   569// Vxor is ideally V6 (Key[0-3]), but for slightly improved encrypting
   570// performance V6 and IVEC can be swapped (xor is both associative and
   571// commutative) during encryption:
   572//
   573//	VXOR INOUT, IVEC, INOUT
   574//	VXOR INOUT, V6, INOUT
   575//
   576//	into
   577//
   578//	VXOR INOUT, V6, INOUT
   579//	VXOR INOUT, IVEC, INOUT
   580//
   581#define CIPHER_BLOCK(Vin, Vxor, Vout, vcipher, vciphel, label10, label12) \
   582	VXOR	Vin, Vxor, Vout \
   583	BEQ	CR1, label10 \
   584	BEQ	CR2, label12 \
   585	vcipher	Vout, V7, Vout \
   586	vcipher	Vout, V8, Vout \
   587	label12: \
   588	vcipher	Vout, V9, Vout \
   589	vcipher	Vout, V10, Vout \
   590	label10: \
   591	vcipher	Vout, V11, Vout \
   592	vcipher	Vout, V12, Vout \
   593	vcipher	Vout, V13, Vout \
   594	vcipher	Vout, V14, Vout \
   595	vcipher	Vout, V15, Vout \
   596	vcipher	Vout, V16, Vout \
   597	vcipher	Vout, V17, Vout \
   598	vcipher	Vout, V18, Vout \
   599	vcipher	Vout, V19, Vout \
   600	vciphel	Vout, V20, Vout \
   601
   602#define CLEAR_KEYS() \
   603	VXOR	V6, V6, V6 \
   604	VXOR	V7, V7, V7 \
   605	VXOR	V8, V8, V8 \
   606	VXOR	V9, V9, V9 \
   607	VXOR	V10, V10, V10 \
   608	VXOR	V11, V11, V11 \
   609	VXOR	V12, V12, V12 \
   610	VXOR	V13, V13, V13 \
   611	VXOR	V14, V14, V14 \
   612	VXOR	V15, V15, V15 \
   613	VXOR	V16, V16, V16 \
   614	VXOR	V17, V17, V17 \
   615	VXOR	V18, V18, V18 \
   616	VXOR	V19, V19, V19 \
   617	VXOR	V20, V20, V20
   618
   619//func cryptBlocksChain(src, dst *byte, length int, key *uint32, iv *byte, enc int, nr int)
   620TEXT ·cryptBlocksChain(SB), NOSPLIT|NOFRAME, $0
   621	MOVD	src+0(FP), INP
   622	MOVD	dst+8(FP), OUTP
   623	MOVD	length+16(FP), LEN
   624	MOVD	key+24(FP), KEYP
   625	MOVD	iv+32(FP), IVP
   626	MOVD	enc+40(FP), ENC
   627	MOVD	nr+48(FP), ROUNDS
   628
   629#ifdef NEEDS_ESPERM
   630	MOVD	$·rcon(SB), R11
   631	LVX	(R11), ESPERM   // Permute value for P8_ macros.
   632#endif
   633
   634	// Assume len > 0 && len % blockSize == 0.
   635	CMPW	ENC, $0
   636	P8_LXVB16X(IVP, R0, IVEC)
   637	CMPU	ROUNDS, $10, CR1
   638	CMPU	ROUNDS, $12, CR2 // Only sizes 10/12/14 are supported.
   639
   640	// Setup key in VSRs, and set loop count in CTR.
   641	LOAD_KEY(KEYP)
   642	SRD	$4, LEN
   643	MOVD	LEN, CTR
   644
   645	BEQ	Lcbc_dec
   646
   647	PCALIGN $16
   648Lcbc_enc:
   649	P8_LXVB16X(INP, R0, INOUT)
   650	ADD	$16, INP
   651	VXOR	INOUT, V6, INOUT
   652	CIPHER_BLOCK(INOUT, IVEC, INOUT, VCIPHER, VCIPHERLAST, Lcbc_enc10, Lcbc_enc12)
   653	VOR	INOUT, INOUT, IVEC // ciphertext (INOUT) is IVEC for next block.
   654	P8_STXVB16X(INOUT, OUTP, R0)
   655	ADD	$16, OUTP
   656	BDNZ	Lcbc_enc
   657
   658	P8_STXVB16X(INOUT, IVP, R0)
   659	CLEAR_KEYS()
   660	RET
   661
   662	PCALIGN $16
   663Lcbc_dec:
   664	P8_LXVB16X(INP, R0, TMP)
   665	ADD	$16, INP
   666	CIPHER_BLOCK(TMP, V6, INOUT, VNCIPHER, VNCIPHERLAST, Lcbc_dec10, Lcbc_dec12)
   667	VXOR	INOUT, IVEC, INOUT
   668	VOR	TMP, TMP, IVEC // TMP is IVEC for next block.
   669	P8_STXVB16X(INOUT, OUTP, R0)
   670	ADD	$16, OUTP
   671	BDNZ	Lcbc_dec
   672
   673	P8_STXVB16X(IVEC, IVP, R0)
   674	CLEAR_KEYS()
   675	RET

View as plain text