...

Text file src/crypto/sha512/sha512block_ppc64x.s

Documentation: crypto/sha512

     1// Copyright 2016 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5// Based on CRYPTOGAMS code with the following comment:
     6// # ====================================================================
     7// # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
     8// # project. The module is, however, dual licensed under OpenSSL and
     9// # CRYPTOGAMS licenses depending on where you obtain it. For further
    10// # details see http://www.openssl.org/~appro/cryptogams/.
    11// # ====================================================================
    12
    13//go:build ppc64 || ppc64le
    14
    15#include "textflag.h"
    16
    17// SHA512 block routine. See sha512block.go for Go equivalent.
    18//
    19// The algorithm is detailed in FIPS 180-4:
    20//
    21//  https://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
    22//
    23// Wt = Mt; for 0 <= t <= 15
    24// Wt = SIGMA1(Wt-2) + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 79
    25//
    26// a = H0
    27// b = H1
    28// c = H2
    29// d = H3
    30// e = H4
    31// f = H5
    32// g = H6
    33// h = H7
    34//
    35// for t = 0 to 79 {
    36//    T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + Kt + Wt
    37//    T2 = BIGSIGMA0(a) + Maj(a,b,c)
    38//    h = g
    39//    g = f
    40//    f = e
    41//    e = d + T1
    42//    d = c
    43//    c = b
    44//    b = a
    45//    a = T1 + T2
    46// }
    47//
    48// H0 = a + H0
    49// H1 = b + H1
    50// H2 = c + H2
    51// H3 = d + H3
    52// H4 = e + H4
    53// H5 = f + H5
    54// H6 = g + H6
    55// H7 = h + H7
    56
    57#define CTX	R3
    58#define INP	R4
    59#define END	R5
    60#define TBL	R6
    61#define CNT	R8
    62#define LEN	R9
    63#define TEMP	R12
    64
    65#define TBL_STRT R7 // Pointer to start of kcon table.
    66
    67#define R_x000	R0
    68#define R_x010	R10
    69#define R_x020	R25
    70#define R_x030	R26
    71#define R_x040	R14
    72#define R_x050	R15
    73#define R_x060	R16
    74#define R_x070	R17
    75#define R_x080	R18
    76#define R_x090	R19
    77#define R_x0a0	R20
    78#define R_x0b0	R21
    79#define R_x0c0	R22
    80#define R_x0d0	R23
    81#define R_x0e0	R24
    82#define R_x0f0	R28
    83#define R_x100	R29
    84#define R_x110	R27
    85
    86
    87// V0-V7 are A-H
    88// V8-V23 are used for the message schedule
    89#define KI	V24
    90#define FUNC	V25
    91#define S0	V26
    92#define S1	V27
    93#define s0	V28
    94#define s1	V29
    95#define LEMASK	V31	// Permutation control register for little endian
    96
    97// VPERM is needed on LE to switch the bytes
    98
    99#ifdef GOARCH_ppc64le
   100#define VPERMLE(va,vb,vc,vt) VPERM va, vb, vc, vt
   101#else
   102#define VPERMLE(va,vb,vc,vt)
   103#endif
   104
   105// 2 copies of each Kt, to fill both doublewords of a vector register
   106DATA  ·kcon+0x000(SB)/8, $0x428a2f98d728ae22
   107DATA  ·kcon+0x008(SB)/8, $0x428a2f98d728ae22
   108DATA  ·kcon+0x010(SB)/8, $0x7137449123ef65cd
   109DATA  ·kcon+0x018(SB)/8, $0x7137449123ef65cd
   110DATA  ·kcon+0x020(SB)/8, $0xb5c0fbcfec4d3b2f
   111DATA  ·kcon+0x028(SB)/8, $0xb5c0fbcfec4d3b2f
   112DATA  ·kcon+0x030(SB)/8, $0xe9b5dba58189dbbc
   113DATA  ·kcon+0x038(SB)/8, $0xe9b5dba58189dbbc
   114DATA  ·kcon+0x040(SB)/8, $0x3956c25bf348b538
   115DATA  ·kcon+0x048(SB)/8, $0x3956c25bf348b538
   116DATA  ·kcon+0x050(SB)/8, $0x59f111f1b605d019
   117DATA  ·kcon+0x058(SB)/8, $0x59f111f1b605d019
   118DATA  ·kcon+0x060(SB)/8, $0x923f82a4af194f9b
   119DATA  ·kcon+0x068(SB)/8, $0x923f82a4af194f9b
   120DATA  ·kcon+0x070(SB)/8, $0xab1c5ed5da6d8118
   121DATA  ·kcon+0x078(SB)/8, $0xab1c5ed5da6d8118
   122DATA  ·kcon+0x080(SB)/8, $0xd807aa98a3030242
   123DATA  ·kcon+0x088(SB)/8, $0xd807aa98a3030242
   124DATA  ·kcon+0x090(SB)/8, $0x12835b0145706fbe
   125DATA  ·kcon+0x098(SB)/8, $0x12835b0145706fbe
   126DATA  ·kcon+0x0A0(SB)/8, $0x243185be4ee4b28c
   127DATA  ·kcon+0x0A8(SB)/8, $0x243185be4ee4b28c
   128DATA  ·kcon+0x0B0(SB)/8, $0x550c7dc3d5ffb4e2
   129DATA  ·kcon+0x0B8(SB)/8, $0x550c7dc3d5ffb4e2
   130DATA  ·kcon+0x0C0(SB)/8, $0x72be5d74f27b896f
   131DATA  ·kcon+0x0C8(SB)/8, $0x72be5d74f27b896f
   132DATA  ·kcon+0x0D0(SB)/8, $0x80deb1fe3b1696b1
   133DATA  ·kcon+0x0D8(SB)/8, $0x80deb1fe3b1696b1
   134DATA  ·kcon+0x0E0(SB)/8, $0x9bdc06a725c71235
   135DATA  ·kcon+0x0E8(SB)/8, $0x9bdc06a725c71235
   136DATA  ·kcon+0x0F0(SB)/8, $0xc19bf174cf692694
   137DATA  ·kcon+0x0F8(SB)/8, $0xc19bf174cf692694
   138DATA  ·kcon+0x100(SB)/8, $0xe49b69c19ef14ad2
   139DATA  ·kcon+0x108(SB)/8, $0xe49b69c19ef14ad2
   140DATA  ·kcon+0x110(SB)/8, $0xefbe4786384f25e3
   141DATA  ·kcon+0x118(SB)/8, $0xefbe4786384f25e3
   142DATA  ·kcon+0x120(SB)/8, $0x0fc19dc68b8cd5b5
   143DATA  ·kcon+0x128(SB)/8, $0x0fc19dc68b8cd5b5
   144DATA  ·kcon+0x130(SB)/8, $0x240ca1cc77ac9c65
   145DATA  ·kcon+0x138(SB)/8, $0x240ca1cc77ac9c65
   146DATA  ·kcon+0x140(SB)/8, $0x2de92c6f592b0275
   147DATA  ·kcon+0x148(SB)/8, $0x2de92c6f592b0275
   148DATA  ·kcon+0x150(SB)/8, $0x4a7484aa6ea6e483
   149DATA  ·kcon+0x158(SB)/8, $0x4a7484aa6ea6e483
   150DATA  ·kcon+0x160(SB)/8, $0x5cb0a9dcbd41fbd4
   151DATA  ·kcon+0x168(SB)/8, $0x5cb0a9dcbd41fbd4
   152DATA  ·kcon+0x170(SB)/8, $0x76f988da831153b5
   153DATA  ·kcon+0x178(SB)/8, $0x76f988da831153b5
   154DATA  ·kcon+0x180(SB)/8, $0x983e5152ee66dfab
   155DATA  ·kcon+0x188(SB)/8, $0x983e5152ee66dfab
   156DATA  ·kcon+0x190(SB)/8, $0xa831c66d2db43210
   157DATA  ·kcon+0x198(SB)/8, $0xa831c66d2db43210
   158DATA  ·kcon+0x1A0(SB)/8, $0xb00327c898fb213f
   159DATA  ·kcon+0x1A8(SB)/8, $0xb00327c898fb213f
   160DATA  ·kcon+0x1B0(SB)/8, $0xbf597fc7beef0ee4
   161DATA  ·kcon+0x1B8(SB)/8, $0xbf597fc7beef0ee4
   162DATA  ·kcon+0x1C0(SB)/8, $0xc6e00bf33da88fc2
   163DATA  ·kcon+0x1C8(SB)/8, $0xc6e00bf33da88fc2
   164DATA  ·kcon+0x1D0(SB)/8, $0xd5a79147930aa725
   165DATA  ·kcon+0x1D8(SB)/8, $0xd5a79147930aa725
   166DATA  ·kcon+0x1E0(SB)/8, $0x06ca6351e003826f
   167DATA  ·kcon+0x1E8(SB)/8, $0x06ca6351e003826f
   168DATA  ·kcon+0x1F0(SB)/8, $0x142929670a0e6e70
   169DATA  ·kcon+0x1F8(SB)/8, $0x142929670a0e6e70
   170DATA  ·kcon+0x200(SB)/8, $0x27b70a8546d22ffc
   171DATA  ·kcon+0x208(SB)/8, $0x27b70a8546d22ffc
   172DATA  ·kcon+0x210(SB)/8, $0x2e1b21385c26c926
   173DATA  ·kcon+0x218(SB)/8, $0x2e1b21385c26c926
   174DATA  ·kcon+0x220(SB)/8, $0x4d2c6dfc5ac42aed
   175DATA  ·kcon+0x228(SB)/8, $0x4d2c6dfc5ac42aed
   176DATA  ·kcon+0x230(SB)/8, $0x53380d139d95b3df
   177DATA  ·kcon+0x238(SB)/8, $0x53380d139d95b3df
   178DATA  ·kcon+0x240(SB)/8, $0x650a73548baf63de
   179DATA  ·kcon+0x248(SB)/8, $0x650a73548baf63de
   180DATA  ·kcon+0x250(SB)/8, $0x766a0abb3c77b2a8
   181DATA  ·kcon+0x258(SB)/8, $0x766a0abb3c77b2a8
   182DATA  ·kcon+0x260(SB)/8, $0x81c2c92e47edaee6
   183DATA  ·kcon+0x268(SB)/8, $0x81c2c92e47edaee6
   184DATA  ·kcon+0x270(SB)/8, $0x92722c851482353b
   185DATA  ·kcon+0x278(SB)/8, $0x92722c851482353b
   186DATA  ·kcon+0x280(SB)/8, $0xa2bfe8a14cf10364
   187DATA  ·kcon+0x288(SB)/8, $0xa2bfe8a14cf10364
   188DATA  ·kcon+0x290(SB)/8, $0xa81a664bbc423001
   189DATA  ·kcon+0x298(SB)/8, $0xa81a664bbc423001
   190DATA  ·kcon+0x2A0(SB)/8, $0xc24b8b70d0f89791
   191DATA  ·kcon+0x2A8(SB)/8, $0xc24b8b70d0f89791
   192DATA  ·kcon+0x2B0(SB)/8, $0xc76c51a30654be30
   193DATA  ·kcon+0x2B8(SB)/8, $0xc76c51a30654be30
   194DATA  ·kcon+0x2C0(SB)/8, $0xd192e819d6ef5218
   195DATA  ·kcon+0x2C8(SB)/8, $0xd192e819d6ef5218
   196DATA  ·kcon+0x2D0(SB)/8, $0xd69906245565a910
   197DATA  ·kcon+0x2D8(SB)/8, $0xd69906245565a910
   198DATA  ·kcon+0x2E0(SB)/8, $0xf40e35855771202a
   199DATA  ·kcon+0x2E8(SB)/8, $0xf40e35855771202a
   200DATA  ·kcon+0x2F0(SB)/8, $0x106aa07032bbd1b8
   201DATA  ·kcon+0x2F8(SB)/8, $0x106aa07032bbd1b8
   202DATA  ·kcon+0x300(SB)/8, $0x19a4c116b8d2d0c8
   203DATA  ·kcon+0x308(SB)/8, $0x19a4c116b8d2d0c8
   204DATA  ·kcon+0x310(SB)/8, $0x1e376c085141ab53
   205DATA  ·kcon+0x318(SB)/8, $0x1e376c085141ab53
   206DATA  ·kcon+0x320(SB)/8, $0x2748774cdf8eeb99
   207DATA  ·kcon+0x328(SB)/8, $0x2748774cdf8eeb99
   208DATA  ·kcon+0x330(SB)/8, $0x34b0bcb5e19b48a8
   209DATA  ·kcon+0x338(SB)/8, $0x34b0bcb5e19b48a8
   210DATA  ·kcon+0x340(SB)/8, $0x391c0cb3c5c95a63
   211DATA  ·kcon+0x348(SB)/8, $0x391c0cb3c5c95a63
   212DATA  ·kcon+0x350(SB)/8, $0x4ed8aa4ae3418acb
   213DATA  ·kcon+0x358(SB)/8, $0x4ed8aa4ae3418acb
   214DATA  ·kcon+0x360(SB)/8, $0x5b9cca4f7763e373
   215DATA  ·kcon+0x368(SB)/8, $0x5b9cca4f7763e373
   216DATA  ·kcon+0x370(SB)/8, $0x682e6ff3d6b2b8a3
   217DATA  ·kcon+0x378(SB)/8, $0x682e6ff3d6b2b8a3
   218DATA  ·kcon+0x380(SB)/8, $0x748f82ee5defb2fc
   219DATA  ·kcon+0x388(SB)/8, $0x748f82ee5defb2fc
   220DATA  ·kcon+0x390(SB)/8, $0x78a5636f43172f60
   221DATA  ·kcon+0x398(SB)/8, $0x78a5636f43172f60
   222DATA  ·kcon+0x3A0(SB)/8, $0x84c87814a1f0ab72
   223DATA  ·kcon+0x3A8(SB)/8, $0x84c87814a1f0ab72
   224DATA  ·kcon+0x3B0(SB)/8, $0x8cc702081a6439ec
   225DATA  ·kcon+0x3B8(SB)/8, $0x8cc702081a6439ec
   226DATA  ·kcon+0x3C0(SB)/8, $0x90befffa23631e28
   227DATA  ·kcon+0x3C8(SB)/8, $0x90befffa23631e28
   228DATA  ·kcon+0x3D0(SB)/8, $0xa4506cebde82bde9
   229DATA  ·kcon+0x3D8(SB)/8, $0xa4506cebde82bde9
   230DATA  ·kcon+0x3E0(SB)/8, $0xbef9a3f7b2c67915
   231DATA  ·kcon+0x3E8(SB)/8, $0xbef9a3f7b2c67915
   232DATA  ·kcon+0x3F0(SB)/8, $0xc67178f2e372532b
   233DATA  ·kcon+0x3F8(SB)/8, $0xc67178f2e372532b
   234DATA  ·kcon+0x400(SB)/8, $0xca273eceea26619c
   235DATA  ·kcon+0x408(SB)/8, $0xca273eceea26619c
   236DATA  ·kcon+0x410(SB)/8, $0xd186b8c721c0c207
   237DATA  ·kcon+0x418(SB)/8, $0xd186b8c721c0c207
   238DATA  ·kcon+0x420(SB)/8, $0xeada7dd6cde0eb1e
   239DATA  ·kcon+0x428(SB)/8, $0xeada7dd6cde0eb1e
   240DATA  ·kcon+0x430(SB)/8, $0xf57d4f7fee6ed178
   241DATA  ·kcon+0x438(SB)/8, $0xf57d4f7fee6ed178
   242DATA  ·kcon+0x440(SB)/8, $0x06f067aa72176fba
   243DATA  ·kcon+0x448(SB)/8, $0x06f067aa72176fba
   244DATA  ·kcon+0x450(SB)/8, $0x0a637dc5a2c898a6
   245DATA  ·kcon+0x458(SB)/8, $0x0a637dc5a2c898a6
   246DATA  ·kcon+0x460(SB)/8, $0x113f9804bef90dae
   247DATA  ·kcon+0x468(SB)/8, $0x113f9804bef90dae
   248DATA  ·kcon+0x470(SB)/8, $0x1b710b35131c471b
   249DATA  ·kcon+0x478(SB)/8, $0x1b710b35131c471b
   250DATA  ·kcon+0x480(SB)/8, $0x28db77f523047d84
   251DATA  ·kcon+0x488(SB)/8, $0x28db77f523047d84
   252DATA  ·kcon+0x490(SB)/8, $0x32caab7b40c72493
   253DATA  ·kcon+0x498(SB)/8, $0x32caab7b40c72493
   254DATA  ·kcon+0x4A0(SB)/8, $0x3c9ebe0a15c9bebc
   255DATA  ·kcon+0x4A8(SB)/8, $0x3c9ebe0a15c9bebc
   256DATA  ·kcon+0x4B0(SB)/8, $0x431d67c49c100d4c
   257DATA  ·kcon+0x4B8(SB)/8, $0x431d67c49c100d4c
   258DATA  ·kcon+0x4C0(SB)/8, $0x4cc5d4becb3e42b6
   259DATA  ·kcon+0x4C8(SB)/8, $0x4cc5d4becb3e42b6
   260DATA  ·kcon+0x4D0(SB)/8, $0x597f299cfc657e2a
   261DATA  ·kcon+0x4D8(SB)/8, $0x597f299cfc657e2a
   262DATA  ·kcon+0x4E0(SB)/8, $0x5fcb6fab3ad6faec
   263DATA  ·kcon+0x4E8(SB)/8, $0x5fcb6fab3ad6faec
   264DATA  ·kcon+0x4F0(SB)/8, $0x6c44198c4a475817
   265DATA  ·kcon+0x4F8(SB)/8, $0x6c44198c4a475817
   266DATA  ·kcon+0x500(SB)/8, $0x0000000000000000
   267DATA  ·kcon+0x508(SB)/8, $0x0000000000000000
   268DATA  ·kcon+0x510(SB)/8, $0x1011121314151617
   269DATA  ·kcon+0x518(SB)/8, $0x0001020304050607
   270GLOBL ·kcon(SB), RODATA, $1312
   271
   272#define SHA512ROUND0(a, b, c, d, e, f, g, h, xi, idx) \
   273	VSEL		g, f, e, FUNC; \
   274	VSHASIGMAD	$15, e, $1, S1; \
   275	VADDUDM		xi, h, h; \
   276	VSHASIGMAD	$0, a, $1, S0; \
   277	VADDUDM		FUNC, h, h; \
   278	VXOR		b, a, FUNC; \
   279	VADDUDM		S1, h, h; \
   280	VSEL		b, c, FUNC, FUNC; \
   281	VADDUDM		KI, g, g; \
   282	VADDUDM		h, d, d; \
   283	VADDUDM		FUNC, S0, S0; \
   284	LVX		(TBL)(idx), KI; \
   285	VADDUDM		S0, h, h
   286
   287#define SHA512ROUND1(a, b, c, d, e, f, g, h, xi, xj, xj_1, xj_9, xj_14, idx) \
   288	VSHASIGMAD	$0, xj_1, $0, s0; \
   289	VSEL		g, f, e, FUNC; \
   290	VSHASIGMAD	$15, e, $1, S1; \
   291	VADDUDM		xi, h, h; \
   292	VSHASIGMAD	$0, a, $1, S0; \
   293	VSHASIGMAD	$15, xj_14, $0, s1; \
   294	VADDUDM		FUNC, h, h; \
   295	VXOR		b, a, FUNC; \
   296	VADDUDM		xj_9, xj, xj; \
   297	VADDUDM		S1, h, h; \
   298	VSEL		b, c, FUNC, FUNC; \
   299	VADDUDM		KI, g, g; \
   300	VADDUDM		h, d, d; \
   301	VADDUDM		FUNC, S0, S0; \
   302	VADDUDM		s0, xj, xj; \
   303	LVX		(TBL)(idx), KI; \
   304	VADDUDM		S0, h, h; \
   305	VADDUDM		s1, xj, xj
   306
   307// func block(dig *digest, p []byte)
   308TEXT ·block(SB),0,$0-32
   309	MOVD	dig+0(FP), CTX
   310	MOVD	p_base+8(FP), INP
   311	MOVD	p_len+16(FP), LEN
   312
   313	SRD	$6, LEN
   314	SLD	$6, LEN
   315
   316	ADD	INP, LEN, END
   317
   318	CMP	INP, END
   319	BEQ	end
   320
   321	MOVD	$·kcon(SB), TBL_STRT
   322
   323	MOVD	R0, CNT
   324	MOVWZ	$0x010, R_x010
   325	MOVWZ	$0x020, R_x020
   326	MOVWZ	$0x030, R_x030
   327	MOVD	$0x040, R_x040
   328	MOVD	$0x050, R_x050
   329	MOVD	$0x060, R_x060
   330	MOVD	$0x070, R_x070
   331	MOVD	$0x080, R_x080
   332	MOVD	$0x090, R_x090
   333	MOVD	$0x0a0, R_x0a0
   334	MOVD	$0x0b0, R_x0b0
   335	MOVD	$0x0c0, R_x0c0
   336	MOVD	$0x0d0, R_x0d0
   337	MOVD	$0x0e0, R_x0e0
   338	MOVD	$0x0f0, R_x0f0
   339	MOVD	$0x100, R_x100
   340	MOVD	$0x110, R_x110
   341
   342
   343#ifdef GOARCH_ppc64le
   344	// Generate the mask used with VPERM for LE
   345	MOVWZ	$8, TEMP
   346	LVSL	(TEMP)(R0), LEMASK
   347	VSPLTISB	$0x0F, KI
   348	VXOR	KI, LEMASK, LEMASK
   349#endif
   350
   351	LXVD2X	(CTX)(R_x000), VS32	// v0 = vs32
   352	LXVD2X	(CTX)(R_x010), VS34	// v2 = vs34
   353	LXVD2X	(CTX)(R_x020), VS36	// v4 = vs36
   354
   355	// unpack the input values into vector registers
   356	VSLDOI	$8, V0, V0, V1
   357	LXVD2X	(CTX)(R_x030), VS38	// v6 = vs38
   358	VSLDOI	$8, V2, V2, V3
   359	VSLDOI	$8, V4, V4, V5
   360	VSLDOI	$8, V6, V6, V7
   361
   362loop:
   363	MOVD	TBL_STRT, TBL
   364	LVX	(TBL)(R_x000), KI
   365
   366	LXVD2X	(INP)(R0), VS40	// load v8 (=vs40) in advance
   367	ADD	$16, INP
   368
   369	// Copy V0-V7 to VS24-VS31
   370
   371	XXLOR	V0, V0, VS24
   372	XXLOR	V1, V1, VS25
   373	XXLOR	V2, V2, VS26
   374	XXLOR	V3, V3, VS27
   375	XXLOR	V4, V4, VS28
   376	XXLOR	V5, V5, VS29
   377	XXLOR	V6, V6, VS30
   378	XXLOR	V7, V7, VS31
   379
   380	VADDUDM	KI, V7, V7	// h+K[i]
   381	LVX	(TBL)(R_x010), KI
   382
   383	VPERMLE(V8,V8,LEMASK,V8)
   384	SHA512ROUND0(V0, V1, V2, V3, V4, V5, V6, V7, V8, R_x020)
   385	LXVD2X	(INP)(R_x000), VS42	// load v10 (=vs42) in advance
   386	VSLDOI	$8, V8, V8, V9
   387	SHA512ROUND0(V7, V0, V1, V2, V3, V4, V5, V6, V9, R_x030)
   388	VPERMLE(V10,V10,LEMASK,V10)
   389	SHA512ROUND0(V6, V7, V0, V1, V2, V3, V4, V5, V10, R_x040)
   390	LXVD2X	(INP)(R_x010), VS44	// load v12 (=vs44) in advance
   391	VSLDOI	$8, V10, V10, V11
   392	SHA512ROUND0(V5, V6, V7, V0, V1, V2, V3, V4, V11, R_x050)
   393	VPERMLE(V12,V12,LEMASK,V12)
   394	SHA512ROUND0(V4, V5, V6, V7, V0, V1, V2, V3, V12, R_x060)
   395	LXVD2X	(INP)(R_x020), VS46	// load v14 (=vs46) in advance
   396	VSLDOI	$8, V12, V12, V13
   397	SHA512ROUND0(V3, V4, V5, V6, V7, V0, V1, V2, V13, R_x070)
   398	VPERMLE(V14,V14,LEMASK,V14)
   399	SHA512ROUND0(V2, V3, V4, V5, V6, V7, V0, V1, V14, R_x080)
   400	LXVD2X	(INP)(R_x030), VS48	// load v16 (=vs48) in advance
   401	VSLDOI	$8, V14, V14, V15
   402	SHA512ROUND0(V1, V2, V3, V4, V5, V6, V7, V0, V15, R_x090)
   403	VPERMLE(V16,V16,LEMASK,V16)
   404	SHA512ROUND0(V0, V1, V2, V3, V4, V5, V6, V7, V16, R_x0a0)
   405	LXVD2X	(INP)(R_x040), VS50	// load v18 (=vs50) in advance
   406	VSLDOI	$8, V16, V16, V17
   407	SHA512ROUND0(V7, V0, V1, V2, V3, V4, V5, V6, V17, R_x0b0)
   408	VPERMLE(V18,V18,LEMASK,V18)
   409	SHA512ROUND0(V6, V7, V0, V1, V2, V3, V4, V5, V18, R_x0c0)
   410	LXVD2X	(INP)(R_x050), VS52	// load v20 (=vs52) in advance
   411	VSLDOI	$8, V18, V18, V19
   412	SHA512ROUND0(V5, V6, V7, V0, V1, V2, V3, V4, V19, R_x0d0)
   413	VPERMLE(V20,V20,LEMASK,V20)
   414	SHA512ROUND0(V4, V5, V6, V7, V0, V1, V2, V3, V20, R_x0e0)
   415	LXVD2X	(INP)(R_x060), VS54	// load v22 (=vs54) in advance
   416	VSLDOI	$8, V20, V20, V21
   417	SHA512ROUND0(V3, V4, V5, V6, V7, V0, V1, V2, V21, R_x0f0)
   418	VPERMLE(V22,V22,LEMASK,V22)
   419	SHA512ROUND0(V2, V3, V4, V5, V6, V7, V0, V1, V22, R_x100)
   420	VSLDOI	$8, V22, V22, V23
   421	SHA512ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V23, V8, V9, V17, V22, R_x110)
   422
   423	MOVWZ	$4, TEMP
   424	MOVWZ	TEMP, CTR
   425	ADD	$0x120, TBL
   426	ADD	$0x70, INP
   427
   428L16_xx:
   429	SHA512ROUND1(V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V18, V23, R_x000)
   430	SHA512ROUND1(V7, V0, V1, V2, V3, V4, V5, V6, V9, V10, V11, V19, V8, R_x010)
   431	SHA512ROUND1(V6, V7, V0, V1, V2, V3, V4, V5, V10, V11, V12, V20, V9, R_x020)
   432	SHA512ROUND1(V5, V6, V7, V0, V1, V2, V3, V4, V11, V12, V13, V21, V10, R_x030)
   433	SHA512ROUND1(V4, V5, V6, V7, V0, V1, V2, V3, V12, V13, V14, V22, V11, R_x040)
   434	SHA512ROUND1(V3, V4, V5, V6, V7, V0, V1, V2, V13, V14, V15, V23, V12, R_x050)
   435	SHA512ROUND1(V2, V3, V4, V5, V6, V7, V0, V1, V14, V15, V16, V8, V13, R_x060)
   436	SHA512ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V15, V16, V17, V9, V14, R_x070)
   437	SHA512ROUND1(V0, V1, V2, V3, V4, V5, V6, V7, V16, V17, V18, V10, V15, R_x080)
   438	SHA512ROUND1(V7, V0, V1, V2, V3, V4, V5, V6, V17, V18, V19, V11, V16, R_x090)
   439	SHA512ROUND1(V6, V7, V0, V1, V2, V3, V4, V5, V18, V19, V20, V12, V17, R_x0a0)
   440	SHA512ROUND1(V5, V6, V7, V0, V1, V2, V3, V4, V19, V20, V21, V13, V18, R_x0b0)
   441	SHA512ROUND1(V4, V5, V6, V7, V0, V1, V2, V3, V20, V21, V22, V14, V19, R_x0c0)
   442	SHA512ROUND1(V3, V4, V5, V6, V7, V0, V1, V2, V21, V22, V23, V15, V20, R_x0d0)
   443	SHA512ROUND1(V2, V3, V4, V5, V6, V7, V0, V1, V22, V23, V8, V16, V21, R_x0e0)
   444	SHA512ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V23, V8, V9, V17, V22, R_x0f0)
   445	ADD	$0x100, TBL
   446
   447	BDNZ	L16_xx
   448
   449	XXLOR	VS24, VS24, V10
   450	XXLOR	VS25, VS25, V11
   451	XXLOR	VS26, VS26, V12
   452	XXLOR	VS27, VS27, V13
   453	XXLOR	VS28, VS28, V14
   454	XXLOR	VS29, VS29, V15
   455	XXLOR	VS30, VS30, V16
   456	XXLOR	VS31, VS31, V17
   457	VADDUDM	V10, V0, V0
   458	VADDUDM	V11, V1, V1
   459	VADDUDM	V12, V2, V2
   460	VADDUDM	V13, V3, V3
   461	VADDUDM	V14, V4, V4
   462	VADDUDM	V15, V5, V5
   463	VADDUDM	V16, V6, V6
   464	VADDUDM	V17, V7, V7
   465
   466	CMPU	INP, END
   467	BLT	loop
   468
   469#ifdef GOARCH_ppc64le
   470	VPERM	V0, V1, KI, V0
   471	VPERM	V2, V3, KI, V2
   472	VPERM	V4, V5, KI, V4
   473	VPERM	V6, V7, KI, V6
   474#else
   475	VPERM	V1, V0, KI, V0
   476	VPERM	V3, V2, KI, V2
   477	VPERM	V5, V4, KI, V4
   478	VPERM	V7, V6, KI, V6
   479#endif
   480	STXVD2X	VS32, (CTX+R_x000)	// v0 = vs32
   481	STXVD2X	VS34, (CTX+R_x010)	// v2 = vs34
   482	STXVD2X	VS36, (CTX+R_x020)	// v4 = vs36
   483	STXVD2X	VS38, (CTX+R_x030)	// v6 = vs38
   484
   485end:
   486	RET
   487

View as plain text