...

Text file src/crypto/internal/bigmod/nat_arm64.s

Documentation: crypto/internal/bigmod

     1// Copyright 2013 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5//go:build !purego
     6
     7#include "textflag.h"
     8
     9// func addMulVVW1024(z, x *uint, y uint) (c uint)
    10TEXT ·addMulVVW1024(SB), $0-32
    11	MOVD	$16, R0
    12	JMP		addMulVVWx(SB)
    13
    14// func addMulVVW1536(z, x *uint, y uint) (c uint)
    15TEXT ·addMulVVW1536(SB), $0-32
    16	MOVD	$24, R0
    17	JMP		addMulVVWx(SB)
    18
    19// func addMulVVW2048(z, x *uint, y uint) (c uint)
    20TEXT ·addMulVVW2048(SB), $0-32
    21	MOVD	$32, R0
    22	JMP		addMulVVWx(SB)
    23
    24TEXT addMulVVWx(SB), NOFRAME|NOSPLIT, $0
    25	MOVD	z+0(FP), R1
    26	MOVD	x+8(FP), R2
    27	MOVD	y+16(FP), R3
    28	MOVD	$0, R4
    29
    30// The main loop of this code operates on a block of 4 words every iteration
    31// performing [R4:R12:R11:R10:R9] = R4 + R3 * [R8:R7:R6:R5] + [R12:R11:R10:R9]
    32// where R4 is carried from the previous iteration, R8:R7:R6:R5 hold the next
    33// 4 words of x, R3 is y and R12:R11:R10:R9 are part of the result z.
    34loop:
    35	CBZ	R0, done
    36
    37	LDP.P	16(R2), (R5, R6)
    38	LDP.P	16(R2), (R7, R8)
    39
    40	LDP	(R1), (R9, R10)
    41	ADDS	R4, R9
    42	MUL	R6, R3, R14
    43	ADCS	R14, R10
    44	MUL	R7, R3, R15
    45	LDP	16(R1), (R11, R12)
    46	ADCS	R15, R11
    47	MUL	R8, R3, R16
    48	ADCS	R16, R12
    49	UMULH	R8, R3, R20
    50	ADC	$0, R20
    51
    52	MUL	R5, R3, R13
    53	ADDS	R13, R9
    54	UMULH	R5, R3, R17
    55	ADCS	R17, R10
    56	UMULH	R6, R3, R21
    57	STP.P	(R9, R10), 16(R1)
    58	ADCS	R21, R11
    59	UMULH	R7, R3, R19
    60	ADCS	R19, R12
    61	STP.P	(R11, R12), 16(R1)
    62	ADC	$0, R20, R4
    63
    64	SUB	$4, R0
    65	B	loop
    66
    67done:
    68	MOVD	R4, c+24(FP)
    69	RET

View as plain text