...

Text file src/internal/bytealg/equal_amd64.s

Documentation: internal/bytealg

     1// Copyright 2018 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5#include "go_asm.h"
     6#include "asm_amd64.h"
     7#include "textflag.h"
     8
     9// memequal(a, b unsafe.Pointer, size uintptr) bool
    10TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT,$0-25
    11	// AX = a    (want in SI)
    12	// BX = b    (want in DI)
    13	// CX = size (want in BX)
    14	CMPQ	AX, BX
    15	JNE	neq
    16	MOVQ	$1, AX	// return 1
    17	RET
    18neq:
    19	MOVQ	AX, SI
    20	MOVQ	BX, DI
    21	MOVQ	CX, BX
    22	JMP	memeqbody<>(SB)
    23
    24// memequal_varlen(a, b unsafe.Pointer) bool
    25TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17
    26	// AX = a       (want in SI)
    27	// BX = b       (want in DI)
    28	// 8(DX) = size (want in BX)
    29	CMPQ	AX, BX
    30	JNE	neq
    31	MOVQ	$1, AX	// return 1
    32	RET
    33neq:
    34	MOVQ	AX, SI
    35	MOVQ	BX, DI
    36	MOVQ	8(DX), BX    // compiler stores size at offset 8 in the closure
    37	JMP	memeqbody<>(SB)
    38
    39// Input:
    40//   a in SI
    41//   b in DI
    42//   count in BX
    43// Output:
    44//   result in AX
    45TEXT memeqbody<>(SB),NOSPLIT,$0-0
    46	CMPQ	BX, $8
    47	JB	small
    48	CMPQ	BX, $64
    49	JB	bigloop
    50#ifndef hasAVX2
    51	CMPB	internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
    52	JE	hugeloop_avx2
    53
    54	// 64 bytes at a time using xmm registers
    55	PCALIGN $16
    56hugeloop:
    57	CMPQ	BX, $64
    58	JB	bigloop
    59	MOVOU	(SI), X0
    60	MOVOU	(DI), X1
    61	MOVOU	16(SI), X2
    62	MOVOU	16(DI), X3
    63	MOVOU	32(SI), X4
    64	MOVOU	32(DI), X5
    65	MOVOU	48(SI), X6
    66	MOVOU	48(DI), X7
    67	PCMPEQB	X1, X0
    68	PCMPEQB	X3, X2
    69	PCMPEQB	X5, X4
    70	PCMPEQB	X7, X6
    71	PAND	X2, X0
    72	PAND	X6, X4
    73	PAND	X4, X0
    74	PMOVMSKB X0, DX
    75	ADDQ	$64, SI
    76	ADDQ	$64, DI
    77	SUBQ	$64, BX
    78	CMPL	DX, $0xffff
    79	JEQ	hugeloop
    80	XORQ	AX, AX	// return 0
    81	RET
    82#endif
    83
    84	// 64 bytes at a time using ymm registers
    85	PCALIGN $16
    86hugeloop_avx2:
    87	CMPQ	BX, $64
    88	JB	bigloop_avx2
    89	VMOVDQU	(SI), Y0
    90	VMOVDQU	(DI), Y1
    91	VMOVDQU	32(SI), Y2
    92	VMOVDQU	32(DI), Y3
    93	VPCMPEQB	Y1, Y0, Y4
    94	VPCMPEQB	Y2, Y3, Y5
    95	VPAND	Y4, Y5, Y6
    96	VPMOVMSKB Y6, DX
    97	ADDQ	$64, SI
    98	ADDQ	$64, DI
    99	SUBQ	$64, BX
   100	CMPL	DX, $0xffffffff
   101	JEQ	hugeloop_avx2
   102	VZEROUPPER
   103	XORQ	AX, AX	// return 0
   104	RET
   105
   106bigloop_avx2:
   107	VZEROUPPER
   108
   109	// 8 bytes at a time using 64-bit register
   110	PCALIGN $16
   111bigloop:
   112	CMPQ	BX, $8
   113	JBE	leftover
   114	MOVQ	(SI), CX
   115	MOVQ	(DI), DX
   116	ADDQ	$8, SI
   117	ADDQ	$8, DI
   118	SUBQ	$8, BX
   119	CMPQ	CX, DX
   120	JEQ	bigloop
   121	XORQ	AX, AX	// return 0
   122	RET
   123
   124	// remaining 0-8 bytes
   125leftover:
   126	MOVQ	-8(SI)(BX*1), CX
   127	MOVQ	-8(DI)(BX*1), DX
   128	CMPQ	CX, DX
   129	SETEQ	AX
   130	RET
   131
   132small:
   133	CMPQ	BX, $0
   134	JEQ	equal
   135
   136	LEAQ	0(BX*8), CX
   137	NEGQ	CX
   138
   139	CMPB	SI, $0xf8
   140	JA	si_high
   141
   142	// load at SI won't cross a page boundary.
   143	MOVQ	(SI), SI
   144	JMP	si_finish
   145si_high:
   146	// address ends in 11111xxx. Load up to bytes we want, move to correct position.
   147	MOVQ	-8(SI)(BX*1), SI
   148	SHRQ	CX, SI
   149si_finish:
   150
   151	// same for DI.
   152	CMPB	DI, $0xf8
   153	JA	di_high
   154	MOVQ	(DI), DI
   155	JMP	di_finish
   156di_high:
   157	MOVQ	-8(DI)(BX*1), DI
   158	SHRQ	CX, DI
   159di_finish:
   160
   161	SUBQ	SI, DI
   162	SHLQ	CX, DI
   163equal:
   164	SETEQ	AX
   165	RET

View as plain text