...

Text file src/internal/bytealg/compare_riscv64.s

Documentation: internal/bytealg

     1// Copyright 2022 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5#include "go_asm.h"
     6#include "textflag.h"
     7
     8TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56
     9	// X10 = a_base
    10	// X11 = a_len
    11	// X12 = a_cap (unused)
    12	// X13 = b_base (want in X12)
    13	// X14 = b_len (want in X13)
    14	// X15 = b_cap (unused)
    15	MOV	X13, X12
    16	MOV	X14, X13
    17	JMP	compare<>(SB)
    18
    19TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
    20	// X10 = a_base
    21	// X11 = a_len
    22	// X12 = b_base
    23	// X13 = b_len
    24	JMP	compare<>(SB)
    25
    26// On entry:
    27// X10 points to start of a
    28// X11 length of a
    29// X12 points to start of b
    30// X13 length of b
    31// for non-regabi X14 points to the address to store the return value (-1/0/1)
    32// for regabi the return value in X10
    33TEXT compare<>(SB),NOSPLIT|NOFRAME,$0
    34	BEQ	X10, X12, cmp_len
    35
    36	MOV	X11, X5
    37	BGE	X13, X5, use_a_len // X5 = min(len(a), len(b))
    38	MOV	X13, X5
    39use_a_len:
    40	BEQZ	X5, cmp_len
    41
    42	MOV	$32, X6
    43	BLT	X5, X6, check8_unaligned
    44
    45	// Check alignment - if alignment differs we have to do one byte at a time.
    46	AND	$7, X10, X7
    47	AND	$7, X12, X8
    48	BNE	X7, X8, check8_unaligned
    49	BEQZ	X7, compare32
    50
    51	// Check one byte at a time until we reach 8 byte alignment.
    52	SUB	X7, X0, X7
    53	ADD	$8, X7, X7
    54	SUB	X7, X5, X5
    55align:
    56	SUB	$1, X7
    57	MOVBU	0(X10), X8
    58	MOVBU	0(X12), X9
    59	BNE	X8, X9, cmp
    60	ADD	$1, X10
    61	ADD	$1, X12
    62	BNEZ	X7, align
    63
    64check32:
    65	// X6 contains $32
    66	BLT	X5, X6, compare16
    67compare32:
    68	MOV	0(X10), X15
    69	MOV	0(X12), X16
    70	MOV	8(X10), X17
    71	MOV	8(X12), X18
    72	BNE	X15, X16, cmp8a
    73	BNE	X17, X18, cmp8b
    74	MOV	16(X10), X15
    75	MOV	16(X12), X16
    76	MOV	24(X10), X17
    77	MOV	24(X12), X18
    78	BNE	X15, X16, cmp8a
    79	BNE	X17, X18, cmp8b
    80	ADD	$32, X10
    81	ADD	$32, X12
    82	SUB	$32, X5
    83	BGE	X5, X6, compare32
    84	BEQZ	X5, cmp_len
    85
    86check16:
    87	MOV	$16, X6
    88	BLT	X5, X6, check8_unaligned
    89compare16:
    90	MOV	0(X10), X15
    91	MOV	0(X12), X16
    92	MOV	8(X10), X17
    93	MOV	8(X12), X18
    94	BNE	X15, X16, cmp8a
    95	BNE	X17, X18, cmp8b
    96	ADD	$16, X10
    97	ADD	$16, X12
    98	SUB	$16, X5
    99	BEQZ	X5, cmp_len
   100
   101check8_unaligned:
   102	MOV	$8, X6
   103	BLT	X5, X6, check4_unaligned
   104compare8_unaligned:
   105	MOVBU	0(X10), X8
   106	MOVBU	1(X10), X15
   107	MOVBU	2(X10), X17
   108	MOVBU	3(X10), X19
   109	MOVBU	4(X10), X21
   110	MOVBU	5(X10), X23
   111	MOVBU	6(X10), X25
   112	MOVBU	7(X10), X29
   113	MOVBU	0(X12), X9
   114	MOVBU	1(X12), X16
   115	MOVBU	2(X12), X18
   116	MOVBU	3(X12), X20
   117	MOVBU	4(X12), X22
   118	MOVBU	5(X12), X24
   119	MOVBU	6(X12), X28
   120	MOVBU	7(X12), X30
   121	BNE	X8, X9, cmp1a
   122	BNE	X15, X16, cmp1b
   123	BNE	X17, X18, cmp1c
   124	BNE	X19, X20, cmp1d
   125	BNE	X21, X22, cmp1e
   126	BNE	X23, X24, cmp1f
   127	BNE	X25, X28, cmp1g
   128	BNE	X29, X30, cmp1h
   129	ADD	$8, X10
   130	ADD	$8, X12
   131	SUB	$8, X5
   132	BGE	X5, X6, compare8_unaligned
   133	BEQZ	X5, cmp_len
   134
   135check4_unaligned:
   136	MOV	$4, X6
   137	BLT	X5, X6, compare1
   138compare4_unaligned:
   139	MOVBU	0(X10), X8
   140	MOVBU	1(X10), X15
   141	MOVBU	2(X10), X17
   142	MOVBU	3(X10), X19
   143	MOVBU	0(X12), X9
   144	MOVBU	1(X12), X16
   145	MOVBU	2(X12), X18
   146	MOVBU	3(X12), X20
   147	BNE	X8, X9, cmp1a
   148	BNE	X15, X16, cmp1b
   149	BNE	X17, X18, cmp1c
   150	BNE	X19, X20, cmp1d
   151	ADD	$4, X10
   152	ADD	$4, X12
   153	SUB	$4, X5
   154	BGE	X5, X6, compare4_unaligned
   155
   156compare1:
   157	BEQZ	X5, cmp_len
   158	MOVBU	0(X10), X8
   159	MOVBU	0(X12), X9
   160	BNE	X8, X9, cmp
   161	ADD	$1, X10
   162	ADD	$1, X12
   163	SUB	$1, X5
   164	JMP	compare1
   165
   166	// Compare 8 bytes of memory in X15/X16 that are known to differ.
   167cmp8a:
   168	MOV	X15, X17
   169	MOV	X16, X18
   170
   171	// Compare 8 bytes of memory in X17/X18 that are known to differ.
   172cmp8b:
   173	MOV	$0xff, X19
   174cmp8_loop:
   175	AND	X17, X19, X8
   176	AND	X18, X19, X9
   177	BNE	X8, X9, cmp
   178	SLLI	$8, X19
   179	JMP	cmp8_loop
   180
   181cmp1a:
   182	SLTU	X9, X8, X5
   183	SLTU	X8, X9, X6
   184	JMP	cmp_ret
   185cmp1b:
   186	SLTU	X16, X15, X5
   187	SLTU	X15, X16, X6
   188	JMP	cmp_ret
   189cmp1c:
   190	SLTU	X18, X17, X5
   191	SLTU	X17, X18, X6
   192	JMP	cmp_ret
   193cmp1d:
   194	SLTU	X20, X19, X5
   195	SLTU	X19, X20, X6
   196	JMP	cmp_ret
   197cmp1e:
   198	SLTU	X22, X21, X5
   199	SLTU	X21, X22, X6
   200	JMP	cmp_ret
   201cmp1f:
   202	SLTU	X24, X23, X5
   203	SLTU	X23, X24, X6
   204	JMP	cmp_ret
   205cmp1g:
   206	SLTU	X28, X25, X5
   207	SLTU	X25, X28, X6
   208	JMP	cmp_ret
   209cmp1h:
   210	SLTU	X30, X29, X5
   211	SLTU	X29, X30, X6
   212	JMP	cmp_ret
   213
   214cmp_len:
   215	MOV	X11, X8
   216	MOV	X13, X9
   217cmp:
   218	SLTU	X9, X8, X5
   219	SLTU	X8, X9, X6
   220cmp_ret:
   221	SUB	X5, X6, X10
   222	RET

View as plain text