...

Text file src/crypto/sha1/sha1block_arm.s

Documentation: crypto/sha1

     1// Copyright 2014 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4//
     5// ARM version of md5block.go
     6
     7#include "textflag.h"
     8
     9// SHA-1 block routine. See sha1block.go for Go equivalent.
    10//
    11// There are 80 rounds of 4 types:
    12//   - rounds 0-15 are type 1 and load data (ROUND1 macro).
    13//   - rounds 16-19 are type 1 and do not load data (ROUND1x macro).
    14//   - rounds 20-39 are type 2 and do not load data (ROUND2 macro).
    15//   - rounds 40-59 are type 3 and do not load data (ROUND3 macro).
    16//   - rounds 60-79 are type 4 and do not load data (ROUND4 macro).
    17//
    18// Each round loads or shuffles the data, then computes a per-round
    19// function of b, c, d, and then mixes the result into and rotates the
    20// five registers a, b, c, d, e holding the intermediate results.
    21//
    22// The register rotation is implemented by rotating the arguments to
    23// the round macros instead of by explicit move instructions.
    24
    25// Register definitions
    26#define Rdata	R0	// Pointer to incoming data
    27#define Rconst	R1	// Current constant for SHA round
    28#define Ra	R2		// SHA-1 accumulator
    29#define Rb	R3		// SHA-1 accumulator
    30#define Rc	R4		// SHA-1 accumulator
    31#define Rd	R5		// SHA-1 accumulator
    32#define Re	R6		// SHA-1 accumulator
    33#define Rt0	R7		// Temporary
    34#define Rt1	R8		// Temporary
    35// r9, r10 are forbidden
    36// r11 is OK provided you check the assembler that no synthetic instructions use it
    37#define Rt2	R11		// Temporary
    38#define Rctr	R12	// loop counter
    39#define Rw	R14		// point to w buffer
    40
    41// func block(dig *digest, p []byte)
    42// 0(FP) is *digest
    43// 4(FP) is p.array (struct Slice)
    44// 8(FP) is p.len
    45//12(FP) is p.cap
    46//
    47// Stack frame
    48#define p_end	end-4(SP)		// pointer to the end of data
    49#define p_data	data-8(SP)	// current data pointer (unused?)
    50#define w_buf	buf-(8+4*80)(SP)	//80 words temporary buffer w uint32[80]
    51#define saved	abcde-(8+4*80+4*5)(SP)	// saved sha1 registers a,b,c,d,e - these must be last (unused?)
    52// Total size +4 for saved LR is 352
    53
    54	// w[i] = p[j]<<24 | p[j+1]<<16 | p[j+2]<<8 | p[j+3]
    55	// e += w[i]
    56#define LOAD(Re) \
    57	MOVBU	2(Rdata), Rt0 ; \
    58	MOVBU	3(Rdata), Rt1 ; \
    59	MOVBU	1(Rdata), Rt2 ; \
    60	ORR	Rt0<<8, Rt1, Rt0	    ; \
    61	MOVBU.P	4(Rdata), Rt1 ; \
    62	ORR	Rt2<<16, Rt0, Rt0	    ; \
    63	ORR	Rt1<<24, Rt0, Rt0	    ; \
    64	MOVW.P	Rt0, 4(Rw)		    ; \
    65	ADD	Rt0, Re, Re
    66
    67	// tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf]
    68	// w[i&0xf] = tmp<<1 | tmp>>(32-1)
    69	// e += w[i&0xf]
    70#define SHUFFLE(Re) \
    71	MOVW	(-16*4)(Rw), Rt0 ; \
    72	MOVW	(-14*4)(Rw), Rt1 ; \
    73	MOVW	(-8*4)(Rw), Rt2  ; \
    74	EOR	Rt0, Rt1, Rt0  ; \
    75	MOVW	(-3*4)(Rw), Rt1  ; \
    76	EOR	Rt2, Rt0, Rt0  ; \
    77	EOR	Rt0, Rt1, Rt0  ; \
    78	MOVW	Rt0@>(32-1), Rt0  ; \
    79	MOVW.P	Rt0, 4(Rw)	  ; \
    80	ADD	Rt0, Re, Re
    81
    82	// t1 = (b & c) | ((~b) & d)
    83#define FUNC1(Ra, Rb, Rc, Rd, Re) \
    84	MVN	Rb, Rt1	   ; \
    85	AND	Rb, Rc, Rt0  ; \
    86	AND	Rd, Rt1, Rt1 ; \
    87	ORR	Rt0, Rt1, Rt1
    88
    89	// t1 = b ^ c ^ d
    90#define FUNC2(Ra, Rb, Rc, Rd, Re) \
    91	EOR	Rb, Rc, Rt1 ; \
    92	EOR	Rd, Rt1, Rt1
    93
    94	// t1 = (b & c) | (b & d) | (c & d) =
    95	// t1 = (b & c) | ((b | c) & d)
    96#define FUNC3(Ra, Rb, Rc, Rd, Re) \
    97	ORR	Rb, Rc, Rt0  ; \
    98	AND	Rb, Rc, Rt1  ; \
    99	AND	Rd, Rt0, Rt0 ; \
   100	ORR	Rt0, Rt1, Rt1
   101
   102#define FUNC4 FUNC2
   103
   104	// a5 := a<<5 | a>>(32-5)
   105	// b = b<<30 | b>>(32-30)
   106	// e = a5 + t1 + e + const
   107#define MIX(Ra, Rb, Rc, Rd, Re) \
   108	ADD	Rt1, Re, Re	 ; \
   109	MOVW	Rb@>(32-30), Rb	 ; \
   110	ADD	Ra@>(32-5), Re, Re ; \
   111	ADD	Rconst, Re, Re
   112
   113#define ROUND1(Ra, Rb, Rc, Rd, Re) \
   114	LOAD(Re)		; \
   115	FUNC1(Ra, Rb, Rc, Rd, Re)	; \
   116	MIX(Ra, Rb, Rc, Rd, Re)
   117
   118#define ROUND1x(Ra, Rb, Rc, Rd, Re) \
   119	SHUFFLE(Re)	; \
   120	FUNC1(Ra, Rb, Rc, Rd, Re)	; \
   121	MIX(Ra, Rb, Rc, Rd, Re)
   122
   123#define ROUND2(Ra, Rb, Rc, Rd, Re) \
   124	SHUFFLE(Re)	; \
   125	FUNC2(Ra, Rb, Rc, Rd, Re)	; \
   126	MIX(Ra, Rb, Rc, Rd, Re)
   127
   128#define ROUND3(Ra, Rb, Rc, Rd, Re) \
   129	SHUFFLE(Re)	; \
   130	FUNC3(Ra, Rb, Rc, Rd, Re)	; \
   131	MIX(Ra, Rb, Rc, Rd, Re)
   132
   133#define ROUND4(Ra, Rb, Rc, Rd, Re) \
   134	SHUFFLE(Re)	; \
   135	FUNC4(Ra, Rb, Rc, Rd, Re)	; \
   136	MIX(Ra, Rb, Rc, Rd, Re)
   137
   138
   139// func block(dig *digest, p []byte)
   140TEXT	·block(SB), 0, $352-16
   141	MOVW	p+4(FP), Rdata	// pointer to the data
   142	MOVW	p_len+8(FP), Rt0	// number of bytes
   143	ADD	Rdata, Rt0
   144	MOVW	Rt0, p_end	// pointer to end of data
   145
   146	// Load up initial SHA-1 accumulator
   147	MOVW	dig+0(FP), Rt0
   148	MOVM.IA (Rt0), [Ra,Rb,Rc,Rd,Re]
   149
   150loop:
   151	// Save registers at SP+4 onwards
   152	MOVM.IB [Ra,Rb,Rc,Rd,Re], (R13)
   153
   154	MOVW	$w_buf, Rw
   155	MOVW	$0x5A827999, Rconst
   156	MOVW	$3, Rctr
   157loop1:	ROUND1(Ra, Rb, Rc, Rd, Re)
   158	ROUND1(Re, Ra, Rb, Rc, Rd)
   159	ROUND1(Rd, Re, Ra, Rb, Rc)
   160	ROUND1(Rc, Rd, Re, Ra, Rb)
   161	ROUND1(Rb, Rc, Rd, Re, Ra)
   162	SUB.S	$1, Rctr
   163	BNE	loop1
   164
   165	ROUND1(Ra, Rb, Rc, Rd, Re)
   166	ROUND1x(Re, Ra, Rb, Rc, Rd)
   167	ROUND1x(Rd, Re, Ra, Rb, Rc)
   168	ROUND1x(Rc, Rd, Re, Ra, Rb)
   169	ROUND1x(Rb, Rc, Rd, Re, Ra)
   170
   171	MOVW	$0x6ED9EBA1, Rconst
   172	MOVW	$4, Rctr
   173loop2:	ROUND2(Ra, Rb, Rc, Rd, Re)
   174	ROUND2(Re, Ra, Rb, Rc, Rd)
   175	ROUND2(Rd, Re, Ra, Rb, Rc)
   176	ROUND2(Rc, Rd, Re, Ra, Rb)
   177	ROUND2(Rb, Rc, Rd, Re, Ra)
   178	SUB.S	$1, Rctr
   179	BNE	loop2
   180
   181	MOVW	$0x8F1BBCDC, Rconst
   182	MOVW	$4, Rctr
   183loop3:	ROUND3(Ra, Rb, Rc, Rd, Re)
   184	ROUND3(Re, Ra, Rb, Rc, Rd)
   185	ROUND3(Rd, Re, Ra, Rb, Rc)
   186	ROUND3(Rc, Rd, Re, Ra, Rb)
   187	ROUND3(Rb, Rc, Rd, Re, Ra)
   188	SUB.S	$1, Rctr
   189	BNE	loop3
   190
   191	MOVW	$0xCA62C1D6, Rconst
   192	MOVW	$4, Rctr
   193loop4:	ROUND4(Ra, Rb, Rc, Rd, Re)
   194	ROUND4(Re, Ra, Rb, Rc, Rd)
   195	ROUND4(Rd, Re, Ra, Rb, Rc)
   196	ROUND4(Rc, Rd, Re, Ra, Rb)
   197	ROUND4(Rb, Rc, Rd, Re, Ra)
   198	SUB.S	$1, Rctr
   199	BNE	loop4
   200
   201	// Accumulate - restoring registers from SP+4
   202	MOVM.IB (R13), [Rt0,Rt1,Rt2,Rctr,Rw]
   203	ADD	Rt0, Ra
   204	ADD	Rt1, Rb
   205	ADD	Rt2, Rc
   206	ADD	Rctr, Rd
   207	ADD	Rw, Re
   208
   209	MOVW	p_end, Rt0
   210	CMP	Rt0, Rdata
   211	BLO	loop
   212
   213	// Save final SHA-1 accumulator
   214	MOVW	dig+0(FP), Rt0
   215	MOVM.IA [Ra,Rb,Rc,Rd,Re], (Rt0)
   216
   217	RET

View as plain text