// Copyright 2023 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package main import ( "strconv" . "github.com/mmcloughlin/avo/build" . "github.com/mmcloughlin/avo/operand" . "github.com/mmcloughlin/avo/reg" ) //go:generate go run . -out ../nat_amd64.s -pkg bigmod func main() { Package("crypto/internal/bigmod") ConstraintExpr("!purego") addMulVVW(1024) addMulVVW(1536) addMulVVW(2048) Generate() } func addMulVVW(bits int) { if bits%64 != 0 { panic("bit size unsupported") } Implement("addMulVVW" + strconv.Itoa(bits)) CMPB(Mem{Symbol: Symbol{Name: "·supportADX"}, Base: StaticBase}, Imm(1)) JEQ(LabelRef("adx")) z := Mem{Base: Load(Param("z"), GP64())} x := Mem{Base: Load(Param("x"), GP64())} y := Load(Param("y"), GP64()) carry := GP64() XORQ(carry, carry) // zero out carry for i := 0; i < bits/64; i++ { Comment("Iteration " + strconv.Itoa(i)) hi, lo := RDX, RAX // implicit MULQ inputs and outputs MOVQ(x.Offset(i*8), lo) MULQ(y) ADDQ(z.Offset(i*8), lo) ADCQ(Imm(0), hi) ADDQ(carry, lo) ADCQ(Imm(0), hi) MOVQ(hi, carry) MOVQ(lo, z.Offset(i*8)) } Store(carry, ReturnIndex(0)) RET() Label("adx") // The ADX strategy implements the following function, where c1 and c2 are // the overflow and the carry flag respectively. // // func addMulVVW(z, x []uint, y uint) (carry uint) { // var c1, c2 uint // for i := range z { // hi, lo := bits.Mul(x[i], y) // lo, c1 = bits.Add(lo, z[i], c1) // z[i], c2 = bits.Add(lo, carry, c2) // carry = hi // } // return carry + c1 + c2 // } // // The loop is fully unrolled and the hi / carry registers are alternated // instead of introducing a MOV. z = Mem{Base: Load(Param("z"), GP64())} x = Mem{Base: Load(Param("x"), GP64())} Load(Param("y"), RDX) // implicit source of MULXQ carry = GP64() XORQ(carry, carry) // zero out carry z0 := GP64() XORQ(z0, z0) // unset flags and zero out z0 for i := 0; i < bits/64; i++ { hi, lo := GP64(), GP64() Comment("Iteration " + strconv.Itoa(i)) MULXQ(x.Offset(i*8), lo, hi) ADCXQ(carry, lo) ADOXQ(z.Offset(i*8), lo) MOVQ(lo, z.Offset(i*8)) i++ Comment("Iteration " + strconv.Itoa(i)) MULXQ(x.Offset(i*8), lo, carry) ADCXQ(hi, lo) ADOXQ(z.Offset(i*8), lo) MOVQ(lo, z.Offset(i*8)) } Comment("Add back carry flags and return") ADCXQ(z0, carry) ADOXQ(z0, carry) Store(carry, ReturnIndex(0)) RET() }