...

Text file src/runtime/memmove_ppc64x.s

Documentation: runtime

     1// Copyright 2014 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5//go:build ppc64 || ppc64le
     6
     7#include "textflag.h"
     8
     9// See memmove Go doc for important implementation constraints.
    10
    11// func memmove(to, from unsafe.Pointer, n uintptr)
    12
    13// target address
    14#define TGT R3
    15// source address
    16#define SRC R4
    17// length to move
    18#define LEN R5
    19// number of doublewords
    20#define DWORDS R6
    21// number of bytes < 8
    22#define BYTES R7
    23// const 16 used as index
    24#define IDX16 R8
    25// temp used for copies, etc.
    26#define TMP R9
    27// number of 64 byte chunks
    28#define QWORDS R10
    29// index values
    30#define IDX32 R14
    31#define IDX48 R15
    32#define OCTWORDS R16
    33
    34TEXT runtime·memmove<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-24
    35	// R3 = TGT = to
    36	// R4 = SRC = from
    37	// R5 = LEN = n
    38
    39	// Determine if there are doublewords to
    40	// copy so a more efficient move can be done
    41check:
    42#ifdef GOPPC64_power10
    43	CMP	LEN, $16
    44	BGT	mcopy
    45	SLD	$56, LEN, TMP
    46	LXVL	SRC, TMP, V0
    47	STXVL	V0, TGT, TMP
    48	RET
    49#endif
    50mcopy:
    51	ANDCC	$7, LEN, BYTES	// R7: bytes to copy
    52	SRD	$3, LEN, DWORDS	// R6: double words to copy
    53	MOVFL	CR0, CR3	// save CR from ANDCC
    54	CMP	DWORDS, $0, CR1	// CR1[EQ] set if no double words to copy
    55
    56	// Determine overlap by subtracting dest - src and comparing against the
    57	// length.  This catches the cases where src and dest are in different types
    58	// of storage such as stack and static to avoid doing backward move when not
    59	// necessary.
    60
    61	SUB	SRC, TGT, TMP	// dest - src
    62	CMPU	TMP, LEN, CR2	// < len?
    63	BC	12, 8, backward // BLT CR2 backward
    64
    65	// Copying forward if no overlap.
    66
    67	BC	12, 6, checkbytes	// BEQ CR1, checkbytes
    68	SRDCC	$3, DWORDS, OCTWORDS	// 64 byte chunks?
    69	MOVD	$16, IDX16
    70	BEQ	lt64gt8			// < 64 bytes
    71
    72	// Prepare for moves of 64 bytes at a time.
    73
    74forward64setup:
    75	DCBTST	(TGT)			// prepare data cache
    76	DCBT	(SRC)
    77	MOVD	OCTWORDS, CTR		// Number of 64 byte chunks
    78	MOVD	$32, IDX32
    79	MOVD	$48, IDX48
    80	PCALIGN	$16
    81
    82forward64:
    83	LXVD2X	(R0)(SRC), VS32		// load 64 bytes
    84	LXVD2X	(IDX16)(SRC), VS33
    85	LXVD2X	(IDX32)(SRC), VS34
    86	LXVD2X	(IDX48)(SRC), VS35
    87	ADD	$64, SRC
    88	STXVD2X	VS32, (R0)(TGT)		// store 64 bytes
    89	STXVD2X	VS33, (IDX16)(TGT)
    90	STXVD2X	VS34, (IDX32)(TGT)
    91	STXVD2X VS35, (IDX48)(TGT)
    92	ADD	$64,TGT			// bump up for next set
    93	BC	16, 0, forward64	// continue
    94	ANDCC	$7, DWORDS		// remaining doublewords
    95	BEQ	checkbytes		// only bytes remain
    96
    97lt64gt8:
    98	CMP	DWORDS, $4
    99	BLT	lt32gt8
   100	LXVD2X	(R0)(SRC), VS32
   101	LXVD2X	(IDX16)(SRC), VS33
   102	ADD	$-4, DWORDS
   103	STXVD2X	VS32, (R0)(TGT)
   104	STXVD2X	VS33, (IDX16)(TGT)
   105	ADD	$32, SRC
   106	ADD	$32, TGT
   107
   108lt32gt8:
   109	// At this point >= 8 and < 32
   110	// Move 16 bytes if possible
   111	CMP     DWORDS, $2
   112	BLT     lt16
   113	LXVD2X	(R0)(SRC), VS32
   114	ADD	$-2, DWORDS
   115	STXVD2X	VS32, (R0)(TGT)
   116	ADD     $16, SRC
   117	ADD     $16, TGT
   118
   119lt16:	// Move 8 bytes if possible
   120	CMP     DWORDS, $1
   121	BLT     checkbytes
   122#ifdef GOPPC64_power10
   123	ADD	$8, BYTES
   124	SLD	$56, BYTES, TMP
   125	LXVL	SRC, TMP, V0
   126	STXVL	V0, TGT, TMP
   127	RET
   128#endif
   129
   130	MOVD    0(SRC), TMP
   131	ADD	$8, SRC
   132	MOVD    TMP, 0(TGT)
   133	ADD     $8, TGT
   134checkbytes:
   135	BC	12, 14, LR		// BEQ lr
   136#ifdef GOPPC64_power10
   137	SLD	$56, BYTES, TMP
   138	LXVL	SRC, TMP, V0
   139	STXVL	V0, TGT, TMP
   140	RET
   141#endif
   142lt8:	// Move word if possible
   143	CMP BYTES, $4
   144	BLT lt4
   145	MOVWZ 0(SRC), TMP
   146	ADD $-4, BYTES
   147	MOVW TMP, 0(TGT)
   148	ADD $4, SRC
   149	ADD $4, TGT
   150lt4:	// Move halfword if possible
   151	CMP BYTES, $2
   152	BLT lt2
   153	MOVHZ 0(SRC), TMP
   154	ADD $-2, BYTES
   155	MOVH TMP, 0(TGT)
   156	ADD $2, SRC
   157	ADD $2, TGT
   158lt2:	// Move last byte if 1 left
   159	CMP BYTES, $1
   160	BC 12, 0, LR	// ble lr
   161	MOVBZ 0(SRC), TMP
   162	MOVBZ TMP, 0(TGT)
   163	RET
   164
   165backward:
   166	// Copying backwards proceeds by copying R7 bytes then copying R6 double words.
   167	// R3 and R4 are advanced to the end of the destination/source buffers
   168	// respectively and moved back as we copy.
   169
   170	ADD	LEN, SRC, SRC		// end of source
   171	ADD	TGT, LEN, TGT		// end of dest
   172
   173	BEQ	nobackwardtail		// earlier condition
   174
   175	MOVD	BYTES, CTR			// bytes to move
   176
   177backwardtailloop:
   178	MOVBZ 	-1(SRC), TMP		// point to last byte
   179	SUB	$1,SRC
   180	MOVBZ 	TMP, -1(TGT)
   181	SUB	$1,TGT
   182	BDNZ	backwardtailloop
   183
   184nobackwardtail:
   185	BC	4, 5, LR		// blelr cr1, return if DWORDS == 0
   186	SRDCC	$2,DWORDS,QWORDS	// Compute number of 32B blocks and compare to 0
   187	BNE	backward32setup		// If QWORDS != 0, start the 32B copy loop.
   188
   189backward24:
   190	// DWORDS is a value between 1-3.
   191	CMP	DWORDS, $2
   192
   193	MOVD 	-8(SRC), TMP
   194	MOVD 	TMP, -8(TGT)
   195	BC	12, 0, LR		// bltlr, return if DWORDS == 1
   196
   197	MOVD 	-16(SRC), TMP
   198	MOVD 	TMP, -16(TGT)
   199	BC	12, 2, LR		// beqlr, return if DWORDS == 2
   200
   201	MOVD 	-24(SRC), TMP
   202	MOVD 	TMP, -24(TGT)
   203	RET
   204
   205backward32setup:
   206	ANDCC   $3,DWORDS		// Compute remaining DWORDS and compare to 0
   207	MOVD	QWORDS, CTR		// set up loop ctr
   208	MOVD	$16, IDX16		// 32 bytes at a time
   209	PCALIGN	$16
   210
   211backward32loop:
   212	SUB	$32, TGT
   213	SUB	$32, SRC
   214	LXVD2X	(R0)(SRC), VS32		// load 16x2 bytes
   215	LXVD2X	(IDX16)(SRC), VS33
   216	STXVD2X	VS32, (R0)(TGT)		// store 16x2 bytes
   217	STXVD2X	VS33, (IDX16)(TGT)
   218	BDNZ	backward32loop
   219	BC	12, 2, LR		// beqlr, return if DWORDS == 0
   220	BR	backward24

View as plain text