memmove_arm.s

Documentation: runtime

     1// Inferno's libkern/memmove-arm.s
     2// https://bitbucket.org/inferno-os/inferno-os/src/master/libkern/memmove-arm.s
     3//
     4//         Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
     5//         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
     6//         Portions Copyright 2009 The Go Authors. All rights reserved.
     7//
     8// Permission is hereby granted, free of charge, to any person obtaining a copy
     9// of this software and associated documentation files (the "Software"), to deal
    10// in the Software without restriction, including without limitation the rights
    11// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    12// copies of the Software, and to permit persons to whom the Software is
    13// furnished to do so, subject to the following conditions:
    14//
    15// The above copyright notice and this permission notice shall be included in
    16// all copies or substantial portions of the Software.
    17//
    18// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    19// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    20// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    21// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    22// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    23// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    24// THE SOFTWARE.
    25
    26#include "textflag.h"
    27
    28// TE or TS are spilled to the stack during bulk register moves.
    29#define TS	R0
    30#define TE	R8
    31
    32// Warning: the linker will use R11 to synthesize certain instructions. Please
    33// take care and double check with objdump.
    34#define FROM	R11
    35#define N	R12
    36#define TMP	R12				/* N and TMP don't overlap */
    37#define TMP1	R5
    38
    39#define RSHIFT	R5
    40#define LSHIFT	R6
    41#define OFFSET	R7
    42
    43#define BR0	R0					/* shared with TS */
    44#define BW0	R1
    45#define BR1	R1
    46#define BW1	R2
    47#define BR2	R2
    48#define BW2	R3
    49#define BR3	R3
    50#define BW3	R4
    51
    52#define FW0	R1
    53#define FR0	R2
    54#define FW1	R2
    55#define FR1	R3
    56#define FW2	R3
    57#define FR2	R4
    58#define FW3	R4
    59#define FR3	R8					/* shared with TE */
    60
    61// See memmove Go doc for important implementation constraints.
    62
    63// func memmove(to, from unsafe.Pointer, n uintptr)
    64TEXT runtime·memmove(SB), NOSPLIT, $4-12
    65_memmove:
    66	MOVW	to+0(FP), TS
    67	MOVW	from+4(FP), FROM
    68	MOVW	n+8(FP), N
    69
    70	ADD	N, TS, TE	/* to end pointer */
    71
    72	CMP	FROM, TS
    73	BLS	_forward
    74
    75_back:
    76	ADD	N, FROM		/* from end pointer */
    77	CMP	$4, N		/* need at least 4 bytes to copy */
    78	BLT	_b1tail
    79
    80_b4align:				/* align destination on 4 */
    81	AND.S	$3, TE, TMP
    82	BEQ	_b4aligned
    83
    84	MOVBU.W	-1(FROM), TMP	/* pre-indexed */
    85	MOVBU.W	TMP, -1(TE)	/* pre-indexed */
    86	B	_b4align
    87
    88_b4aligned:				/* is source now aligned? */
    89	AND.S	$3, FROM, TMP
    90	BNE	_bunaligned
    91
    92	ADD	$31, TS, TMP	/* do 32-byte chunks if possible */
    93	MOVW	TS, savedts-4(SP)
    94_b32loop:
    95	CMP	TMP, TE
    96	BLS	_b4tail
    97
    98	MOVM.DB.W (FROM), [R0-R7]
    99	MOVM.DB.W [R0-R7], (TE)
   100	B	_b32loop
   101
   102_b4tail:				/* do remaining words if possible */
   103	MOVW	savedts-4(SP), TS
   104	ADD	$3, TS, TMP
   105_b4loop:
   106	CMP	TMP, TE
   107	BLS	_b1tail
   108
   109	MOVW.W	-4(FROM), TMP1	/* pre-indexed */
   110	MOVW.W	TMP1, -4(TE)	/* pre-indexed */
   111	B	_b4loop
   112
   113_b1tail:				/* remaining bytes */
   114	CMP	TE, TS
   115	BEQ	_return
   116
   117	MOVBU.W	-1(FROM), TMP	/* pre-indexed */
   118	MOVBU.W	TMP, -1(TE)	/* pre-indexed */
   119	B	_b1tail
   120
   121_forward:
   122	CMP	$4, N		/* need at least 4 bytes to copy */
   123	BLT	_f1tail
   124
   125_f4align:				/* align destination on 4 */
   126	AND.S	$3, TS, TMP
   127	BEQ	_f4aligned
   128
   129	MOVBU.P	1(FROM), TMP	/* implicit write back */
   130	MOVBU.P	TMP, 1(TS)	/* implicit write back */
   131	B	_f4align
   132
   133_f4aligned:				/* is source now aligned? */
   134	AND.S	$3, FROM, TMP
   135	BNE	_funaligned
   136
   137	SUB	$31, TE, TMP	/* do 32-byte chunks if possible */
   138	MOVW	TE, savedte-4(SP)
   139_f32loop:
   140	CMP	TMP, TS
   141	BHS	_f4tail
   142
   143	MOVM.IA.W (FROM), [R1-R8]
   144	MOVM.IA.W [R1-R8], (TS)
   145	B	_f32loop
   146
   147_f4tail:
   148	MOVW	savedte-4(SP), TE
   149	SUB	$3, TE, TMP	/* do remaining words if possible */
   150_f4loop:
   151	CMP	TMP, TS
   152	BHS	_f1tail
   153
   154	MOVW.P	4(FROM), TMP1	/* implicit write back */
   155	MOVW.P	TMP1, 4(TS)	/* implicit write back */
   156	B	_f4loop
   157
   158_f1tail:
   159	CMP	TS, TE
   160	BEQ	_return
   161
   162	MOVBU.P	1(FROM), TMP	/* implicit write back */
   163	MOVBU.P	TMP, 1(TS)	/* implicit write back */
   164	B	_f1tail
   165
   166_return:
   167	MOVW	to+0(FP), R0
   168	RET
   169
   170_bunaligned:
   171	CMP	$2, TMP		/* is TMP < 2 ? */
   172
   173	MOVW.LT	$8, RSHIFT		/* (R(n)<<24)|(R(n-1)>>8) */
   174	MOVW.LT	$24, LSHIFT
   175	MOVW.LT	$1, OFFSET
   176
   177	MOVW.EQ	$16, RSHIFT		/* (R(n)<<16)|(R(n-1)>>16) */
   178	MOVW.EQ	$16, LSHIFT
   179	MOVW.EQ	$2, OFFSET
   180
   181	MOVW.GT	$24, RSHIFT		/* (R(n)<<8)|(R(n-1)>>24) */
   182	MOVW.GT	$8, LSHIFT
   183	MOVW.GT	$3, OFFSET
   184
   185	ADD	$16, TS, TMP	/* do 16-byte chunks if possible */
   186	CMP	TMP, TE
   187	BLS	_b1tail
   188
   189	BIC	$3, FROM		/* align source */
   190	MOVW	TS, savedts-4(SP)
   191	MOVW	(FROM), BR0	/* prime first block register */
   192
   193_bu16loop:
   194	CMP	TMP, TE
   195	BLS	_bu1tail
   196
   197	MOVW	BR0<<LSHIFT, BW3
   198	MOVM.DB.W (FROM), [BR0-BR3]
   199	ORR	BR3>>RSHIFT, BW3
   200
   201	MOVW	BR3<<LSHIFT, BW2
   202	ORR	BR2>>RSHIFT, BW2
   203
   204	MOVW	BR2<<LSHIFT, BW1
   205	ORR	BR1>>RSHIFT, BW1
   206
   207	MOVW	BR1<<LSHIFT, BW0
   208	ORR	BR0>>RSHIFT, BW0
   209
   210	MOVM.DB.W [BW0-BW3], (TE)
   211	B	_bu16loop
   212
   213_bu1tail:
   214	MOVW	savedts-4(SP), TS
   215	ADD	OFFSET, FROM
   216	B	_b1tail
   217
   218_funaligned:
   219	CMP	$2, TMP
   220
   221	MOVW.LT	$8, RSHIFT		/* (R(n+1)<<24)|(R(n)>>8) */
   222	MOVW.LT	$24, LSHIFT
   223	MOVW.LT	$3, OFFSET
   224
   225	MOVW.EQ	$16, RSHIFT		/* (R(n+1)<<16)|(R(n)>>16) */
   226	MOVW.EQ	$16, LSHIFT
   227	MOVW.EQ	$2, OFFSET
   228
   229	MOVW.GT	$24, RSHIFT		/* (R(n+1)<<8)|(R(n)>>24) */
   230	MOVW.GT	$8, LSHIFT
   231	MOVW.GT	$1, OFFSET
   232
   233	SUB	$16, TE, TMP	/* do 16-byte chunks if possible */
   234	CMP	TMP, TS
   235	BHS	_f1tail
   236
   237	BIC	$3, FROM		/* align source */
   238	MOVW	TE, savedte-4(SP)
   239	MOVW.P	4(FROM), FR3	/* prime last block register, implicit write back */
   240
   241_fu16loop:
   242	CMP	TMP, TS
   243	BHS	_fu1tail
   244
   245	MOVW	FR3>>RSHIFT, FW0
   246	MOVM.IA.W (FROM), [FR0,FR1,FR2,FR3]
   247	ORR	FR0<<LSHIFT, FW0
   248
   249	MOVW	FR0>>RSHIFT, FW1
   250	ORR	FR1<<LSHIFT, FW1
   251
   252	MOVW	FR1>>RSHIFT, FW2
   253	ORR	FR2<<LSHIFT, FW2
   254
   255	MOVW	FR2>>RSHIFT, FW3
   256	ORR	FR3<<LSHIFT, FW3
   257
   258	MOVM.IA.W [FW0,FW1,FW2,FW3], (TS)
   259	B	_fu16loop
   260
   261_fu1tail:
   262	MOVW	savedte-4(SP), TE
   263	SUB	OFFSET, FROM
   264	B	_f1tail
View as plain text