...

Text file src/math/big/arith_386.s

Documentation: math/big

     1// Copyright 2009 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5//go:build !math_big_pure_go
     6
     7#include "textflag.h"
     8
     9// This file provides fast assembly versions for the elementary
    10// arithmetic operations on vectors implemented in arith.go.
    11
    12// func addVV(z, x, y []Word) (c Word)
    13TEXT ·addVV(SB),NOSPLIT,$0
    14	MOVL z+0(FP), DI
    15	MOVL x+12(FP), SI
    16	MOVL y+24(FP), CX
    17	MOVL z_len+4(FP), BP
    18	MOVL $0, BX		// i = 0
    19	MOVL $0, DX		// c = 0
    20	JMP E1
    21
    22L1:	MOVL (SI)(BX*4), AX
    23	ADDL DX, DX		// restore CF
    24	ADCL (CX)(BX*4), AX
    25	SBBL DX, DX		// save CF
    26	MOVL AX, (DI)(BX*4)
    27	ADDL $1, BX		// i++
    28
    29E1:	CMPL BX, BP		// i < n
    30	JL L1
    31
    32	NEGL DX
    33	MOVL DX, c+36(FP)
    34	RET
    35
    36
    37// func subVV(z, x, y []Word) (c Word)
    38// (same as addVV except for SBBL instead of ADCL and label names)
    39TEXT ·subVV(SB),NOSPLIT,$0
    40	MOVL z+0(FP), DI
    41	MOVL x+12(FP), SI
    42	MOVL y+24(FP), CX
    43	MOVL z_len+4(FP), BP
    44	MOVL $0, BX		// i = 0
    45	MOVL $0, DX		// c = 0
    46	JMP E2
    47
    48L2:	MOVL (SI)(BX*4), AX
    49	ADDL DX, DX		// restore CF
    50	SBBL (CX)(BX*4), AX
    51	SBBL DX, DX		// save CF
    52	MOVL AX, (DI)(BX*4)
    53	ADDL $1, BX		// i++
    54
    55E2:	CMPL BX, BP		// i < n
    56	JL L2
    57
    58	NEGL DX
    59	MOVL DX, c+36(FP)
    60	RET
    61
    62
    63// func addVW(z, x []Word, y Word) (c Word)
    64TEXT ·addVW(SB),NOSPLIT,$0
    65	MOVL z+0(FP), DI
    66	MOVL x+12(FP), SI
    67	MOVL y+24(FP), AX	// c = y
    68	MOVL z_len+4(FP), BP
    69	MOVL $0, BX		// i = 0
    70	JMP E3
    71
    72L3:	ADDL (SI)(BX*4), AX
    73	MOVL AX, (DI)(BX*4)
    74	SBBL AX, AX		// save CF
    75	NEGL AX
    76	ADDL $1, BX		// i++
    77
    78E3:	CMPL BX, BP		// i < n
    79	JL L3
    80
    81	MOVL AX, c+28(FP)
    82	RET
    83
    84
    85// func subVW(z, x []Word, y Word) (c Word)
    86TEXT ·subVW(SB),NOSPLIT,$0
    87	MOVL z+0(FP), DI
    88	MOVL x+12(FP), SI
    89	MOVL y+24(FP), AX	// c = y
    90	MOVL z_len+4(FP), BP
    91	MOVL $0, BX		// i = 0
    92	JMP E4
    93
    94L4:	MOVL (SI)(BX*4), DX
    95	SUBL AX, DX
    96	MOVL DX, (DI)(BX*4)
    97	SBBL AX, AX		// save CF
    98	NEGL AX
    99	ADDL $1, BX		// i++
   100
   101E4:	CMPL BX, BP		// i < n
   102	JL L4
   103
   104	MOVL AX, c+28(FP)
   105	RET
   106
   107
   108// func shlVU(z, x []Word, s uint) (c Word)
   109TEXT ·shlVU(SB),NOSPLIT,$0
   110	MOVL z_len+4(FP), BX	// i = z
   111	SUBL $1, BX		// i--
   112	JL X8b			// i < 0	(n <= 0)
   113
   114	// n > 0
   115	MOVL z+0(FP), DI
   116	MOVL x+12(FP), SI
   117	MOVL s+24(FP), CX
   118	MOVL (SI)(BX*4), AX	// w1 = x[n-1]
   119	MOVL $0, DX
   120	SHLL CX, AX, DX		// w1>>ŝ
   121	MOVL DX, c+28(FP)
   122
   123	CMPL BX, $0
   124	JLE X8a			// i <= 0
   125
   126	// i > 0
   127L8:	MOVL AX, DX		// w = w1
   128	MOVL -4(SI)(BX*4), AX	// w1 = x[i-1]
   129	SHLL CX, AX, DX		// w<<s | w1>>ŝ
   130	MOVL DX, (DI)(BX*4)	// z[i] = w<<s | w1>>ŝ
   131	SUBL $1, BX		// i--
   132	JG L8			// i > 0
   133
   134	// i <= 0
   135X8a:	SHLL CX, AX		// w1<<s
   136	MOVL AX, (DI)		// z[0] = w1<<s
   137	RET
   138
   139X8b:	MOVL $0, c+28(FP)
   140	RET
   141
   142
   143// func shrVU(z, x []Word, s uint) (c Word)
   144TEXT ·shrVU(SB),NOSPLIT,$0
   145	MOVL z_len+4(FP), BP
   146	SUBL $1, BP		// n--
   147	JL X9b			// n < 0	(n <= 0)
   148
   149	// n > 0
   150	MOVL z+0(FP), DI
   151	MOVL x+12(FP), SI
   152	MOVL s+24(FP), CX
   153	MOVL (SI), AX		// w1 = x[0]
   154	MOVL $0, DX
   155	SHRL CX, AX, DX		// w1<<ŝ
   156	MOVL DX, c+28(FP)
   157
   158	MOVL $0, BX		// i = 0
   159	JMP E9
   160
   161	// i < n-1
   162L9:	MOVL AX, DX		// w = w1
   163	MOVL 4(SI)(BX*4), AX	// w1 = x[i+1]
   164	SHRL CX, AX, DX		// w>>s | w1<<ŝ
   165	MOVL DX, (DI)(BX*4)	// z[i] = w>>s | w1<<ŝ
   166	ADDL $1, BX		// i++
   167
   168E9:	CMPL BX, BP
   169	JL L9			// i < n-1
   170
   171	// i >= n-1
   172X9a:	SHRL CX, AX		// w1>>s
   173	MOVL AX, (DI)(BP*4)	// z[n-1] = w1>>s
   174	RET
   175
   176X9b:	MOVL $0, c+28(FP)
   177	RET
   178
   179
   180// func mulAddVWW(z, x []Word, y, r Word) (c Word)
   181TEXT ·mulAddVWW(SB),NOSPLIT,$0
   182	MOVL z+0(FP), DI
   183	MOVL x+12(FP), SI
   184	MOVL y+24(FP), BP
   185	MOVL r+28(FP), CX	// c = r
   186	MOVL z_len+4(FP), BX
   187	LEAL (DI)(BX*4), DI
   188	LEAL (SI)(BX*4), SI
   189	NEGL BX			// i = -n
   190	JMP E5
   191
   192L5:	MOVL (SI)(BX*4), AX
   193	MULL BP
   194	ADDL CX, AX
   195	ADCL $0, DX
   196	MOVL AX, (DI)(BX*4)
   197	MOVL DX, CX
   198	ADDL $1, BX		// i++
   199
   200E5:	CMPL BX, $0		// i < 0
   201	JL L5
   202
   203	MOVL CX, c+32(FP)
   204	RET
   205
   206
   207// func addMulVVW(z, x []Word, y Word) (c Word)
   208TEXT ·addMulVVW(SB),NOSPLIT,$0
   209	MOVL z+0(FP), DI
   210	MOVL x+12(FP), SI
   211	MOVL y+24(FP), BP
   212	MOVL z_len+4(FP), BX
   213	LEAL (DI)(BX*4), DI
   214	LEAL (SI)(BX*4), SI
   215	NEGL BX			// i = -n
   216	MOVL $0, CX		// c = 0
   217	JMP E6
   218
   219L6:	MOVL (SI)(BX*4), AX
   220	MULL BP
   221	ADDL CX, AX
   222	ADCL $0, DX
   223	ADDL AX, (DI)(BX*4)
   224	ADCL $0, DX
   225	MOVL DX, CX
   226	ADDL $1, BX		// i++
   227
   228E6:	CMPL BX, $0		// i < 0
   229	JL L6
   230
   231	MOVL CX, c+28(FP)
   232	RET
   233
   234
   235

View as plain text