...

Text file src/crypto/md5/md5block_arm.s

Documentation: crypto/md5

     1// Copyright 2013 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4//
     5// ARM version of md5block.go
     6
     7#include "textflag.h"
     8
     9// Register definitions
    10#define Rtable	R0	// Pointer to MD5 constants table
    11#define Rdata	R1	// Pointer to data to hash
    12#define Ra	R2	// MD5 accumulator
    13#define Rb	R3	// MD5 accumulator
    14#define Rc	R4	// MD5 accumulator
    15#define Rd	R5	// MD5 accumulator
    16#define Rc0	R6	// MD5 constant
    17#define Rc1	R7	// MD5 constant
    18#define Rc2	R8	// MD5 constant
    19// r9, r10 are forbidden
    20// r11 is OK provided you check the assembler that no synthetic instructions use it
    21#define Rc3	R11	// MD5 constant
    22#define Rt0	R12	// temporary
    23#define Rt1	R14	// temporary
    24
    25// func block(dig *digest, p []byte)
    26// 0(FP) is *digest
    27// 4(FP) is p.array (struct Slice)
    28// 8(FP) is p.len
    29//12(FP) is p.cap
    30//
    31// Stack frame
    32#define p_end	end-4(SP)	// pointer to the end of data
    33#define p_data	data-8(SP)	// current data pointer
    34#define buf	buffer-(8+4*16)(SP)	//16 words temporary buffer
    35		// 3 words at 4..12(R13) for called routine parameters
    36
    37TEXT	·block(SB), NOSPLIT, $84-16
    38	MOVW	p+4(FP), Rdata	// pointer to the data
    39	MOVW	p_len+8(FP), Rt0	// number of bytes
    40	ADD	Rdata, Rt0
    41	MOVW	Rt0, p_end	// pointer to end of data
    42
    43loop:
    44	MOVW	Rdata, p_data	// Save Rdata
    45	AND.S	$3, Rdata, Rt0	// TST $3, Rdata not working see issue 5921
    46	BEQ	aligned			// aligned detected - skip copy
    47
    48	// Copy the unaligned source data into the aligned temporary buffer
    49	// memmove(to=4(R13), from=8(R13), n=12(R13)) - Corrupts all registers
    50	MOVW	$buf, Rtable	// to
    51	MOVW	$64, Rc0		// n
    52	MOVM.IB	[Rtable,Rdata,Rc0], (R13)
    53	BL	runtime·memmove(SB)
    54
    55	// Point to the local aligned copy of the data
    56	MOVW	$buf, Rdata
    57
    58aligned:
    59	// Point to the table of constants
    60	// A PC relative add would be cheaper than this
    61	MOVW	$·table(SB), Rtable
    62
    63	// Load up initial MD5 accumulator
    64	MOVW	dig+0(FP), Rc0
    65	MOVM.IA (Rc0), [Ra,Rb,Rc,Rd]
    66
    67// a += (((c^d)&b)^d) + X[index] + const
    68// a = a<<shift | a>>(32-shift) + b
    69#define ROUND1(Ra, Rb, Rc, Rd, index, shift, Rconst) \
    70	EOR	Rc, Rd, Rt0		; \
    71	AND	Rb, Rt0			; \
    72	EOR	Rd, Rt0			; \
    73	MOVW	(index<<2)(Rdata), Rt1	; \
    74	ADD	Rt1, Rt0			; \
    75	ADD	Rconst, Rt0			; \
    76	ADD	Rt0, Ra			; \
    77	ADD	Ra@>(32-shift), Rb, Ra	;
    78
    79	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
    80	ROUND1(Ra, Rb, Rc, Rd,  0,	7, Rc0)
    81	ROUND1(Rd, Ra, Rb, Rc,  1, 12, Rc1)
    82	ROUND1(Rc, Rd, Ra, Rb,  2, 17, Rc2)
    83	ROUND1(Rb, Rc, Rd, Ra,  3, 22, Rc3)
    84
    85	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
    86	ROUND1(Ra, Rb, Rc, Rd,  4,	7, Rc0)
    87	ROUND1(Rd, Ra, Rb, Rc,  5, 12, Rc1)
    88	ROUND1(Rc, Rd, Ra, Rb,  6, 17, Rc2)
    89	ROUND1(Rb, Rc, Rd, Ra,  7, 22, Rc3)
    90
    91	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
    92	ROUND1(Ra, Rb, Rc, Rd,  8,	7, Rc0)
    93	ROUND1(Rd, Ra, Rb, Rc,  9, 12, Rc1)
    94	ROUND1(Rc, Rd, Ra, Rb, 10, 17, Rc2)
    95	ROUND1(Rb, Rc, Rd, Ra, 11, 22, Rc3)
    96
    97	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
    98	ROUND1(Ra, Rb, Rc, Rd, 12,	7, Rc0)
    99	ROUND1(Rd, Ra, Rb, Rc, 13, 12, Rc1)
   100	ROUND1(Rc, Rd, Ra, Rb, 14, 17, Rc2)
   101	ROUND1(Rb, Rc, Rd, Ra, 15, 22, Rc3)
   102
   103// a += (((b^c)&d)^c) + X[index] + const
   104// a = a<<shift | a>>(32-shift) + b
   105#define ROUND2(Ra, Rb, Rc, Rd, index, shift, Rconst) \
   106	EOR	Rb, Rc, Rt0		; \
   107	AND	Rd, Rt0			; \
   108	EOR	Rc, Rt0			; \
   109	MOVW	(index<<2)(Rdata), Rt1	; \
   110	ADD	Rt1, Rt0			; \
   111	ADD	Rconst, Rt0			; \
   112	ADD	Rt0, Ra			; \
   113	ADD	Ra@>(32-shift), Rb, Ra	;
   114
   115	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   116	ROUND2(Ra, Rb, Rc, Rd,  1,	5, Rc0)
   117	ROUND2(Rd, Ra, Rb, Rc,  6,	9, Rc1)
   118	ROUND2(Rc, Rd, Ra, Rb, 11, 14, Rc2)
   119	ROUND2(Rb, Rc, Rd, Ra,  0, 20, Rc3)
   120
   121	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   122	ROUND2(Ra, Rb, Rc, Rd,  5,	5, Rc0)
   123	ROUND2(Rd, Ra, Rb, Rc, 10,	9, Rc1)
   124	ROUND2(Rc, Rd, Ra, Rb, 15, 14, Rc2)
   125	ROUND2(Rb, Rc, Rd, Ra,  4, 20, Rc3)
   126
   127	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   128	ROUND2(Ra, Rb, Rc, Rd,  9,	5, Rc0)
   129	ROUND2(Rd, Ra, Rb, Rc, 14,	9, Rc1)
   130	ROUND2(Rc, Rd, Ra, Rb,  3, 14, Rc2)
   131	ROUND2(Rb, Rc, Rd, Ra,  8, 20, Rc3)
   132
   133	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   134	ROUND2(Ra, Rb, Rc, Rd, 13,	5, Rc0)
   135	ROUND2(Rd, Ra, Rb, Rc,  2,	9, Rc1)
   136	ROUND2(Rc, Rd, Ra, Rb,  7, 14, Rc2)
   137	ROUND2(Rb, Rc, Rd, Ra, 12, 20, Rc3)
   138
   139// a += (b^c^d) + X[index] + const
   140// a = a<<shift | a>>(32-shift) + b
   141#define ROUND3(Ra, Rb, Rc, Rd, index, shift, Rconst) \
   142	EOR	Rb, Rc, Rt0		; \
   143	EOR	Rd, Rt0			; \
   144	MOVW	(index<<2)(Rdata), Rt1	; \
   145	ADD	Rt1, Rt0			; \
   146	ADD	Rconst, Rt0			; \
   147	ADD	Rt0, Ra			; \
   148	ADD	Ra@>(32-shift), Rb, Ra	;
   149
   150	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   151	ROUND3(Ra, Rb, Rc, Rd,  5,	4, Rc0)
   152	ROUND3(Rd, Ra, Rb, Rc,  8, 11, Rc1)
   153	ROUND3(Rc, Rd, Ra, Rb, 11, 16, Rc2)
   154	ROUND3(Rb, Rc, Rd, Ra, 14, 23, Rc3)
   155
   156	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   157	ROUND3(Ra, Rb, Rc, Rd,  1,	4, Rc0)
   158	ROUND3(Rd, Ra, Rb, Rc,  4, 11, Rc1)
   159	ROUND3(Rc, Rd, Ra, Rb,  7, 16, Rc2)
   160	ROUND3(Rb, Rc, Rd, Ra, 10, 23, Rc3)
   161
   162	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   163	ROUND3(Ra, Rb, Rc, Rd, 13,	4, Rc0)
   164	ROUND3(Rd, Ra, Rb, Rc,  0, 11, Rc1)
   165	ROUND3(Rc, Rd, Ra, Rb,  3, 16, Rc2)
   166	ROUND3(Rb, Rc, Rd, Ra,  6, 23, Rc3)
   167
   168	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   169	ROUND3(Ra, Rb, Rc, Rd,  9,	4, Rc0)
   170	ROUND3(Rd, Ra, Rb, Rc, 12, 11, Rc1)
   171	ROUND3(Rc, Rd, Ra, Rb, 15, 16, Rc2)
   172	ROUND3(Rb, Rc, Rd, Ra,  2, 23, Rc3)
   173
   174// a += (c^(b|^d)) + X[index] + const
   175// a = a<<shift | a>>(32-shift) + b
   176#define ROUND4(Ra, Rb, Rc, Rd, index, shift, Rconst) \
   177	MVN	Rd, Rt0			; \
   178	ORR	Rb, Rt0			; \
   179	EOR	Rc, Rt0			; \
   180	MOVW	(index<<2)(Rdata), Rt1	; \
   181	ADD	Rt1, Rt0			; \
   182	ADD	Rconst, Rt0			; \
   183	ADD	Rt0, Ra			; \
   184	ADD	Ra@>(32-shift), Rb, Ra	;
   185
   186	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   187	ROUND4(Ra, Rb, Rc, Rd,  0,	6, Rc0)
   188	ROUND4(Rd, Ra, Rb, Rc,  7, 10, Rc1)
   189	ROUND4(Rc, Rd, Ra, Rb, 14, 15, Rc2)
   190	ROUND4(Rb, Rc, Rd, Ra,  5, 21, Rc3)
   191
   192	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   193	ROUND4(Ra, Rb, Rc, Rd, 12,	6, Rc0)
   194	ROUND4(Rd, Ra, Rb, Rc,  3, 10, Rc1)
   195	ROUND4(Rc, Rd, Ra, Rb, 10, 15, Rc2)
   196	ROUND4(Rb, Rc, Rd, Ra,  1, 21, Rc3)
   197
   198	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   199	ROUND4(Ra, Rb, Rc, Rd,  8,	6, Rc0)
   200	ROUND4(Rd, Ra, Rb, Rc, 15, 10, Rc1)
   201	ROUND4(Rc, Rd, Ra, Rb,  6, 15, Rc2)
   202	ROUND4(Rb, Rc, Rd, Ra, 13, 21, Rc3)
   203
   204	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   205	ROUND4(Ra, Rb, Rc, Rd,  4,	6, Rc0)
   206	ROUND4(Rd, Ra, Rb, Rc, 11, 10, Rc1)
   207	ROUND4(Rc, Rd, Ra, Rb,  2, 15, Rc2)
   208	ROUND4(Rb, Rc, Rd, Ra,  9, 21, Rc3)
   209
   210	MOVW	dig+0(FP), Rt0
   211	MOVM.IA (Rt0), [Rc0,Rc1,Rc2,Rc3]
   212
   213	ADD	Rc0, Ra
   214	ADD	Rc1, Rb
   215	ADD	Rc2, Rc
   216	ADD	Rc3, Rd
   217
   218	MOVM.IA [Ra,Rb,Rc,Rd], (Rt0)
   219
   220	MOVW	p_data, Rdata
   221	MOVW	p_end, Rt0
   222	ADD	$64, Rdata
   223	CMP	Rt0, Rdata
   224	BLO	loop
   225
   226	RET
   227
   228// MD5 constants table
   229
   230	// Round 1
   231	DATA	·table+0x00(SB)/4, $0xd76aa478
   232	DATA	·table+0x04(SB)/4, $0xe8c7b756
   233	DATA	·table+0x08(SB)/4, $0x242070db
   234	DATA	·table+0x0c(SB)/4, $0xc1bdceee
   235	DATA	·table+0x10(SB)/4, $0xf57c0faf
   236	DATA	·table+0x14(SB)/4, $0x4787c62a
   237	DATA	·table+0x18(SB)/4, $0xa8304613
   238	DATA	·table+0x1c(SB)/4, $0xfd469501
   239	DATA	·table+0x20(SB)/4, $0x698098d8
   240	DATA	·table+0x24(SB)/4, $0x8b44f7af
   241	DATA	·table+0x28(SB)/4, $0xffff5bb1
   242	DATA	·table+0x2c(SB)/4, $0x895cd7be
   243	DATA	·table+0x30(SB)/4, $0x6b901122
   244	DATA	·table+0x34(SB)/4, $0xfd987193
   245	DATA	·table+0x38(SB)/4, $0xa679438e
   246	DATA	·table+0x3c(SB)/4, $0x49b40821
   247	// Round 2
   248	DATA	·table+0x40(SB)/4, $0xf61e2562
   249	DATA	·table+0x44(SB)/4, $0xc040b340
   250	DATA	·table+0x48(SB)/4, $0x265e5a51
   251	DATA	·table+0x4c(SB)/4, $0xe9b6c7aa
   252	DATA	·table+0x50(SB)/4, $0xd62f105d
   253	DATA	·table+0x54(SB)/4, $0x02441453
   254	DATA	·table+0x58(SB)/4, $0xd8a1e681
   255	DATA	·table+0x5c(SB)/4, $0xe7d3fbc8
   256	DATA	·table+0x60(SB)/4, $0x21e1cde6
   257	DATA	·table+0x64(SB)/4, $0xc33707d6
   258	DATA	·table+0x68(SB)/4, $0xf4d50d87
   259	DATA	·table+0x6c(SB)/4, $0x455a14ed
   260	DATA	·table+0x70(SB)/4, $0xa9e3e905
   261	DATA	·table+0x74(SB)/4, $0xfcefa3f8
   262	DATA	·table+0x78(SB)/4, $0x676f02d9
   263	DATA	·table+0x7c(SB)/4, $0x8d2a4c8a
   264	// Round 3
   265	DATA	·table+0x80(SB)/4, $0xfffa3942
   266	DATA	·table+0x84(SB)/4, $0x8771f681
   267	DATA	·table+0x88(SB)/4, $0x6d9d6122
   268	DATA	·table+0x8c(SB)/4, $0xfde5380c
   269	DATA	·table+0x90(SB)/4, $0xa4beea44
   270	DATA	·table+0x94(SB)/4, $0x4bdecfa9
   271	DATA	·table+0x98(SB)/4, $0xf6bb4b60
   272	DATA	·table+0x9c(SB)/4, $0xbebfbc70
   273	DATA	·table+0xa0(SB)/4, $0x289b7ec6
   274	DATA	·table+0xa4(SB)/4, $0xeaa127fa
   275	DATA	·table+0xa8(SB)/4, $0xd4ef3085
   276	DATA	·table+0xac(SB)/4, $0x04881d05
   277	DATA	·table+0xb0(SB)/4, $0xd9d4d039
   278	DATA	·table+0xb4(SB)/4, $0xe6db99e5
   279	DATA	·table+0xb8(SB)/4, $0x1fa27cf8
   280	DATA	·table+0xbc(SB)/4, $0xc4ac5665
   281	// Round 4
   282	DATA	·table+0xc0(SB)/4, $0xf4292244
   283	DATA	·table+0xc4(SB)/4, $0x432aff97
   284	DATA	·table+0xc8(SB)/4, $0xab9423a7
   285	DATA	·table+0xcc(SB)/4, $0xfc93a039
   286	DATA	·table+0xd0(SB)/4, $0x655b59c3
   287	DATA	·table+0xd4(SB)/4, $0x8f0ccc92
   288	DATA	·table+0xd8(SB)/4, $0xffeff47d
   289	DATA	·table+0xdc(SB)/4, $0x85845dd1
   290	DATA	·table+0xe0(SB)/4, $0x6fa87e4f
   291	DATA	·table+0xe4(SB)/4, $0xfe2ce6e0
   292	DATA	·table+0xe8(SB)/4, $0xa3014314
   293	DATA	·table+0xec(SB)/4, $0x4e0811a1
   294	DATA	·table+0xf0(SB)/4, $0xf7537e82
   295	DATA	·table+0xf4(SB)/4, $0xbd3af235
   296	DATA	·table+0xf8(SB)/4, $0x2ad7d2bb
   297	DATA	·table+0xfc(SB)/4, $0xeb86d391
   298	// Global definition
   299	GLOBL	·table(SB),8,$256

View as plain text