...

Text file src/internal/bytealg/index_arm64.s

Documentation: internal/bytealg

     1// Copyright 2018 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5#include "go_asm.h"
     6#include "textflag.h"
     7
     8TEXT ·Index(SB),NOSPLIT,$0-56
     9	MOVD	a_base+0(FP), R0
    10	MOVD	a_len+8(FP), R1
    11	MOVD	b_base+24(FP), R2
    12	MOVD	b_len+32(FP), R3
    13	MOVD	$ret+48(FP), R9
    14	B	indexbody<>(SB)
    15
    16TEXT ·IndexString(SB),NOSPLIT,$0-40
    17	MOVD	a_base+0(FP), R0
    18	MOVD	a_len+8(FP), R1
    19	MOVD	b_base+16(FP), R2
    20	MOVD	b_len+24(FP), R3
    21	MOVD	$ret+32(FP), R9
    22	B	indexbody<>(SB)
    23
    24// input:
    25//   R0: haystack
    26//   R1: length of haystack
    27//   R2: needle
    28//   R3: length of needle (2 <= len <= 32)
    29//   R9: address to put result
    30TEXT indexbody<>(SB),NOSPLIT,$0-56
    31	// main idea is to load 'sep' into separate register(s)
    32	// to avoid repeatedly re-load it again and again
    33	// for sebsequent substring comparisons
    34	SUB	R3, R1, R4
    35	// R4 contains the start of last substring for comparison
    36	ADD	R0, R4, R4
    37	ADD	$1, R0, R8
    38
    39	CMP	$8, R3
    40	BHI	greater_8
    41	TBZ	$3, R3, len_2_7
    42len_8:
    43	// R5 contains 8-byte of sep
    44	MOVD	(R2), R5
    45loop_8:
    46	// R6 contains substring for comparison
    47	CMP	R4, R0
    48	BHI	not_found
    49	MOVD.P	1(R0), R6
    50	CMP	R5, R6
    51	BNE	loop_8
    52	B	found
    53len_2_7:
    54	TBZ	$2, R3, len_2_3
    55	TBZ	$1, R3, len_4_5
    56	TBZ	$0, R3, len_6
    57len_7:
    58	// R5 and R6 contain 7-byte of sep
    59	MOVWU	(R2), R5
    60	// 1-byte overlap with R5
    61	MOVWU	3(R2), R6
    62loop_7:
    63	CMP	R4, R0
    64	BHI	not_found
    65	MOVWU.P	1(R0), R3
    66	CMP	R5, R3
    67	BNE	loop_7
    68	MOVWU	2(R0), R3
    69	CMP	R6, R3
    70	BNE	loop_7
    71	B	found
    72len_6:
    73	// R5 and R6 contain 6-byte of sep
    74	MOVWU	(R2), R5
    75	MOVHU	4(R2), R6
    76loop_6:
    77	CMP	R4, R0
    78	BHI	not_found
    79	MOVWU.P	1(R0), R3
    80	CMP	R5, R3
    81	BNE	loop_6
    82	MOVHU	3(R0), R3
    83	CMP	R6, R3
    84	BNE	loop_6
    85	B	found
    86len_4_5:
    87	TBZ	$0, R3, len_4
    88len_5:
    89	// R5 and R7 contain 5-byte of sep
    90	MOVWU	(R2), R5
    91	MOVBU	4(R2), R7
    92loop_5:
    93	CMP	R4, R0
    94	BHI	not_found
    95	MOVWU.P	1(R0), R3
    96	CMP	R5, R3
    97	BNE	loop_5
    98	MOVBU	3(R0), R3
    99	CMP	R7, R3
   100	BNE	loop_5
   101	B	found
   102len_4:
   103	// R5 contains 4-byte of sep
   104	MOVWU	(R2), R5
   105loop_4:
   106	CMP	R4, R0
   107	BHI	not_found
   108	MOVWU.P	1(R0), R6
   109	CMP	R5, R6
   110	BNE	loop_4
   111	B	found
   112len_2_3:
   113	TBZ	$0, R3, len_2
   114len_3:
   115	// R6 and R7 contain 3-byte of sep
   116	MOVHU	(R2), R6
   117	MOVBU	2(R2), R7
   118loop_3:
   119	CMP	R4, R0
   120	BHI	not_found
   121	MOVHU.P	1(R0), R3
   122	CMP	R6, R3
   123	BNE	loop_3
   124	MOVBU	1(R0), R3
   125	CMP	R7, R3
   126	BNE	loop_3
   127	B	found
   128len_2:
   129	// R5 contains 2-byte of sep
   130	MOVHU	(R2), R5
   131loop_2:
   132	CMP	R4, R0
   133	BHI	not_found
   134	MOVHU.P	1(R0), R6
   135	CMP	R5, R6
   136	BNE	loop_2
   137found:
   138	SUB	R8, R0, R0
   139	MOVD	R0, (R9)
   140	RET
   141not_found:
   142	MOVD	$-1, R0
   143	MOVD	R0, (R9)
   144	RET
   145greater_8:
   146	SUB	$9, R3, R11	// len(sep) - 9, offset of R0 for last 8 bytes
   147	CMP	$16, R3
   148	BHI	greater_16
   149len_9_16:
   150	MOVD.P	8(R2), R5	// R5 contains the first 8-byte of sep
   151	SUB	$16, R3, R7	// len(sep) - 16, offset of R2 for last 8 bytes
   152	MOVD	(R2)(R7), R6	// R6 contains the last 8-byte of sep
   153loop_9_16:
   154	// search the first 8 bytes first
   155	CMP	R4, R0
   156	BHI	not_found
   157	MOVD.P	1(R0), R7
   158	CMP	R5, R7
   159	BNE	loop_9_16
   160	MOVD	(R0)(R11), R7
   161	CMP	R6, R7		// compare the last 8 bytes
   162	BNE	loop_9_16
   163	B	found
   164greater_16:
   165	CMP	$24, R3
   166	BHI	len_25_32
   167len_17_24:
   168	LDP.P	16(R2), (R5, R6)	// R5 and R6 contain the first 16-byte of sep
   169	SUB	$24, R3, R10		// len(sep) - 24
   170	MOVD	(R2)(R10), R7		// R7 contains the last 8-byte of sep
   171loop_17_24:
   172	// search the first 16 bytes first
   173	CMP	R4, R0
   174	BHI	not_found
   175	MOVD.P	1(R0), R10
   176	CMP	R5, R10
   177	BNE	loop_17_24
   178	MOVD	7(R0), R10
   179	CMP	R6, R10
   180	BNE	loop_17_24
   181	MOVD	(R0)(R11), R10
   182	CMP	R7, R10		// compare the last 8 bytes
   183	BNE	loop_17_24
   184	B	found
   185len_25_32:
   186	LDP.P	16(R2), (R5, R6)
   187	MOVD.P	8(R2), R7	// R5, R6 and R7 contain the first 24-byte of sep
   188	SUB	$32, R3, R12	// len(sep) - 32
   189	MOVD	(R2)(R12), R10	// R10 contains the last 8-byte of sep
   190loop_25_32:
   191	// search the first 24 bytes first
   192	CMP	R4, R0
   193	BHI	not_found
   194	MOVD.P	1(R0), R12
   195	CMP	R5, R12
   196	BNE	loop_25_32
   197	MOVD	7(R0), R12
   198	CMP	R6, R12
   199	BNE	loop_25_32
   200	MOVD	15(R0), R12
   201	CMP	R7, R12
   202	BNE	loop_25_32
   203	MOVD	(R0)(R11), R12
   204	CMP	R10, R12	// compare the last 8 bytes
   205	BNE	loop_25_32
   206	B	found

View as plain text