...

Text file src/runtime/race_amd64.s

Documentation: runtime

     1// Copyright 2013 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5//go:build race
     6
     7#include "go_asm.h"
     8#include "go_tls.h"
     9#include "funcdata.h"
    10#include "textflag.h"
    11#include "cgo/abi_amd64.h"
    12
    13// The following thunks allow calling the gcc-compiled race runtime directly
    14// from Go code without going all the way through cgo.
    15// First, it's much faster (up to 50% speedup for real Go programs).
    16// Second, it eliminates race-related special cases from cgocall and scheduler.
    17// Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
    18
    19// A brief recap of the amd64 calling convention.
    20// Arguments are passed in DI, SI, DX, CX, R8, R9, the rest is on stack.
    21// Callee-saved registers are: BX, BP, R12-R15.
    22// SP must be 16-byte aligned.
    23// On Windows:
    24// Arguments are passed in CX, DX, R8, R9, the rest is on stack.
    25// Callee-saved registers are: BX, BP, DI, SI, R12-R15.
    26// SP must be 16-byte aligned. Windows also requires "stack-backing" for the 4 register arguments:
    27// https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention
    28// We do not do this, because it seems to be intended for vararg/unprototyped functions.
    29// Gcc-compiled race runtime does not try to use that space.
    30
    31#ifdef GOOS_windows
    32#define RARG0 CX
    33#define RARG1 DX
    34#define RARG2 R8
    35#define RARG3 R9
    36#else
    37#define RARG0 DI
    38#define RARG1 SI
    39#define RARG2 DX
    40#define RARG3 CX
    41#endif
    42
    43// func runtime·raceread(addr uintptr)
    44// Called from instrumented code.
    45// Defined as ABIInternal so as to avoid introducing a wrapper,
    46// which would render runtime.getcallerpc ineffective.
    47TEXT	runtime·raceread<ABIInternal>(SB), NOSPLIT, $0-8
    48	MOVQ	AX, RARG1
    49	MOVQ	(SP), RARG2
    50	// void __tsan_read(ThreadState *thr, void *addr, void *pc);
    51	MOVQ	$__tsan_read(SB), AX
    52	JMP	racecalladdr<>(SB)
    53
    54// func runtime·RaceRead(addr uintptr)
    55TEXT	runtime·RaceRead(SB), NOSPLIT, $0-8
    56	// This needs to be a tail call, because raceread reads caller pc.
    57	JMP	runtime·raceread(SB)
    58
    59// void runtime·racereadpc(void *addr, void *callpc, void *pc)
    60TEXT	runtime·racereadpc(SB), NOSPLIT, $0-24
    61	MOVQ	addr+0(FP), RARG1
    62	MOVQ	callpc+8(FP), RARG2
    63	MOVQ	pc+16(FP), RARG3
    64	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
    65	// void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    66	MOVQ	$__tsan_read_pc(SB), AX
    67	JMP	racecalladdr<>(SB)
    68
    69// func runtime·racewrite(addr uintptr)
    70// Called from instrumented code.
    71// Defined as ABIInternal so as to avoid introducing a wrapper,
    72// which would render runtime.getcallerpc ineffective.
    73TEXT	runtime·racewrite<ABIInternal>(SB), NOSPLIT, $0-8
    74	MOVQ	AX, RARG1
    75	MOVQ	(SP), RARG2
    76	// void __tsan_write(ThreadState *thr, void *addr, void *pc);
    77	MOVQ	$__tsan_write(SB), AX
    78	JMP	racecalladdr<>(SB)
    79
    80// func runtime·RaceWrite(addr uintptr)
    81TEXT	runtime·RaceWrite(SB), NOSPLIT, $0-8
    82	// This needs to be a tail call, because racewrite reads caller pc.
    83	JMP	runtime·racewrite(SB)
    84
    85// void runtime·racewritepc(void *addr, void *callpc, void *pc)
    86TEXT	runtime·racewritepc(SB), NOSPLIT, $0-24
    87	MOVQ	addr+0(FP), RARG1
    88	MOVQ	callpc+8(FP), RARG2
    89	MOVQ	pc+16(FP), RARG3
    90	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
    91	// void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    92	MOVQ	$__tsan_write_pc(SB), AX
    93	JMP	racecalladdr<>(SB)
    94
    95// func runtime·racereadrange(addr, size uintptr)
    96// Called from instrumented code.
    97// Defined as ABIInternal so as to avoid introducing a wrapper,
    98// which would render runtime.getcallerpc ineffective.
    99TEXT	runtime·racereadrange<ABIInternal>(SB), NOSPLIT, $0-16
   100	MOVQ	AX, RARG1
   101	MOVQ	BX, RARG2
   102	MOVQ	(SP), RARG3
   103	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   104	MOVQ	$__tsan_read_range(SB), AX
   105	JMP	racecalladdr<>(SB)
   106
   107// func runtime·RaceReadRange(addr, size uintptr)
   108TEXT	runtime·RaceReadRange(SB), NOSPLIT, $0-16
   109	// This needs to be a tail call, because racereadrange reads caller pc.
   110	JMP	runtime·racereadrange(SB)
   111
   112// void runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
   113TEXT	runtime·racereadrangepc1(SB), NOSPLIT, $0-24
   114	MOVQ	addr+0(FP), RARG1
   115	MOVQ	size+8(FP), RARG2
   116	MOVQ	pc+16(FP), RARG3
   117	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
   118	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   119	MOVQ	$__tsan_read_range(SB), AX
   120	JMP	racecalladdr<>(SB)
   121
   122// func runtime·racewriterange(addr, size uintptr)
   123// Called from instrumented code.
   124// Defined as ABIInternal so as to avoid introducing a wrapper,
   125// which would render runtime.getcallerpc ineffective.
   126TEXT	runtime·racewriterange<ABIInternal>(SB), NOSPLIT, $0-16
   127	MOVQ	AX, RARG1
   128	MOVQ	BX, RARG2
   129	MOVQ	(SP), RARG3
   130	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   131	MOVQ	$__tsan_write_range(SB), AX
   132	JMP	racecalladdr<>(SB)
   133
   134// func runtime·RaceWriteRange(addr, size uintptr)
   135TEXT	runtime·RaceWriteRange(SB), NOSPLIT, $0-16
   136	// This needs to be a tail call, because racewriterange reads caller pc.
   137	JMP	runtime·racewriterange(SB)
   138
   139// void runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
   140TEXT	runtime·racewriterangepc1(SB), NOSPLIT, $0-24
   141	MOVQ	addr+0(FP), RARG1
   142	MOVQ	size+8(FP), RARG2
   143	MOVQ	pc+16(FP), RARG3
   144	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
   145	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   146	MOVQ	$__tsan_write_range(SB), AX
   147	JMP	racecalladdr<>(SB)
   148
   149// If addr (RARG1) is out of range, do nothing.
   150// Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
   151TEXT	racecalladdr<>(SB), NOSPLIT, $0-0
   152	MOVQ	g_racectx(R14), RARG0	// goroutine context
   153	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   154	CMPQ	RARG1, runtime·racearenastart(SB)
   155	JB	data
   156	CMPQ	RARG1, runtime·racearenaend(SB)
   157	JB	call
   158data:
   159	CMPQ	RARG1, runtime·racedatastart(SB)
   160	JB	ret
   161	CMPQ	RARG1, runtime·racedataend(SB)
   162	JAE	ret
   163call:
   164	MOVQ	AX, AX		// w/o this 6a miscompiles this function
   165	JMP	racecall<>(SB)
   166ret:
   167	RET
   168
   169// func runtime·racefuncenter(pc uintptr)
   170// Called from instrumented code.
   171TEXT	runtime·racefuncenter(SB), NOSPLIT, $0-8
   172	MOVQ	callpc+0(FP), R11
   173	JMP	racefuncenter<>(SB)
   174
   175// Common code for racefuncenter
   176// R11 = caller's return address
   177TEXT	racefuncenter<>(SB), NOSPLIT|NOFRAME, $0-0
   178	MOVQ	DX, BX		// save function entry context (for closures)
   179	MOVQ	g_racectx(R14), RARG0	// goroutine context
   180	MOVQ	R11, RARG1
   181	// void __tsan_func_enter(ThreadState *thr, void *pc);
   182	MOVQ	$__tsan_func_enter(SB), AX
   183	// racecall<> preserves BX
   184	CALL	racecall<>(SB)
   185	MOVQ	BX, DX	// restore function entry context
   186	RET
   187
   188// func runtime·racefuncexit()
   189// Called from instrumented code.
   190TEXT	runtime·racefuncexit(SB), NOSPLIT, $0-0
   191	MOVQ	g_racectx(R14), RARG0	// goroutine context
   192	// void __tsan_func_exit(ThreadState *thr);
   193	MOVQ	$__tsan_func_exit(SB), AX
   194	JMP	racecall<>(SB)
   195
   196// Atomic operations for sync/atomic package.
   197
   198// Load
   199TEXT	sync∕atomic·LoadInt32(SB), NOSPLIT|NOFRAME, $0-12
   200	GO_ARGS
   201	MOVQ	$__tsan_go_atomic32_load(SB), AX
   202	CALL	racecallatomic<>(SB)
   203	RET
   204
   205TEXT	sync∕atomic·LoadInt64(SB), NOSPLIT|NOFRAME, $0-16
   206	GO_ARGS
   207	MOVQ	$__tsan_go_atomic64_load(SB), AX
   208	CALL	racecallatomic<>(SB)
   209	RET
   210
   211TEXT	sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12
   212	GO_ARGS
   213	JMP	sync∕atomic·LoadInt32(SB)
   214
   215TEXT	sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16
   216	GO_ARGS
   217	JMP	sync∕atomic·LoadInt64(SB)
   218
   219TEXT	sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16
   220	GO_ARGS
   221	JMP	sync∕atomic·LoadInt64(SB)
   222
   223TEXT	sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16
   224	GO_ARGS
   225	JMP	sync∕atomic·LoadInt64(SB)
   226
   227// Store
   228TEXT	sync∕atomic·StoreInt32(SB), NOSPLIT|NOFRAME, $0-12
   229	GO_ARGS
   230	MOVQ	$__tsan_go_atomic32_store(SB), AX
   231	CALL	racecallatomic<>(SB)
   232	RET
   233
   234TEXT	sync∕atomic·StoreInt64(SB), NOSPLIT|NOFRAME, $0-16
   235	GO_ARGS
   236	MOVQ	$__tsan_go_atomic64_store(SB), AX
   237	CALL	racecallatomic<>(SB)
   238	RET
   239
   240TEXT	sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12
   241	GO_ARGS
   242	JMP	sync∕atomic·StoreInt32(SB)
   243
   244TEXT	sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16
   245	GO_ARGS
   246	JMP	sync∕atomic·StoreInt64(SB)
   247
   248TEXT	sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16
   249	GO_ARGS
   250	JMP	sync∕atomic·StoreInt64(SB)
   251
   252// Swap
   253TEXT	sync∕atomic·SwapInt32(SB), NOSPLIT|NOFRAME, $0-20
   254	GO_ARGS
   255	MOVQ	$__tsan_go_atomic32_exchange(SB), AX
   256	CALL	racecallatomic<>(SB)
   257	RET
   258
   259TEXT	sync∕atomic·SwapInt64(SB), NOSPLIT|NOFRAME, $0-24
   260	GO_ARGS
   261	MOVQ	$__tsan_go_atomic64_exchange(SB), AX
   262	CALL	racecallatomic<>(SB)
   263	RET
   264
   265TEXT	sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20
   266	GO_ARGS
   267	JMP	sync∕atomic·SwapInt32(SB)
   268
   269TEXT	sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24
   270	GO_ARGS
   271	JMP	sync∕atomic·SwapInt64(SB)
   272
   273TEXT	sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24
   274	GO_ARGS
   275	JMP	sync∕atomic·SwapInt64(SB)
   276
   277// Add
   278TEXT	sync∕atomic·AddInt32(SB), NOSPLIT|NOFRAME, $0-20
   279	GO_ARGS
   280	MOVQ	$__tsan_go_atomic32_fetch_add(SB), AX
   281	CALL	racecallatomic<>(SB)
   282	MOVL	add+8(FP), AX	// convert fetch_add to add_fetch
   283	ADDL	AX, ret+16(FP)
   284	RET
   285
   286TEXT	sync∕atomic·AddInt64(SB), NOSPLIT|NOFRAME, $0-24
   287	GO_ARGS
   288	MOVQ	$__tsan_go_atomic64_fetch_add(SB), AX
   289	CALL	racecallatomic<>(SB)
   290	MOVQ	add+8(FP), AX	// convert fetch_add to add_fetch
   291	ADDQ	AX, ret+16(FP)
   292	RET
   293
   294TEXT	sync∕atomic·AddUint32(SB), NOSPLIT, $0-20
   295	GO_ARGS
   296	JMP	sync∕atomic·AddInt32(SB)
   297
   298TEXT	sync∕atomic·AddUint64(SB), NOSPLIT, $0-24
   299	GO_ARGS
   300	JMP	sync∕atomic·AddInt64(SB)
   301
   302TEXT	sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24
   303	GO_ARGS
   304	JMP	sync∕atomic·AddInt64(SB)
   305
   306// CompareAndSwap
   307TEXT	sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT|NOFRAME, $0-17
   308	GO_ARGS
   309	MOVQ	$__tsan_go_atomic32_compare_exchange(SB), AX
   310	CALL	racecallatomic<>(SB)
   311	RET
   312
   313TEXT	sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT|NOFRAME, $0-25
   314	GO_ARGS
   315	MOVQ	$__tsan_go_atomic64_compare_exchange(SB), AX
   316	CALL	racecallatomic<>(SB)
   317	RET
   318
   319TEXT	sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17
   320	GO_ARGS
   321	JMP	sync∕atomic·CompareAndSwapInt32(SB)
   322
   323TEXT	sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25
   324	GO_ARGS
   325	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   326
   327TEXT	sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25
   328	GO_ARGS
   329	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   330
   331// Generic atomic operation implementation.
   332// AX already contains target function.
   333TEXT	racecallatomic<>(SB), NOSPLIT|NOFRAME, $0-0
   334	// Trigger SIGSEGV early.
   335	MOVQ	16(SP), R12
   336	MOVBLZX	(R12), R13
   337	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   338	CMPQ	R12, runtime·racearenastart(SB)
   339	JB	racecallatomic_data
   340	CMPQ	R12, runtime·racearenaend(SB)
   341	JB	racecallatomic_ok
   342racecallatomic_data:
   343	CMPQ	R12, runtime·racedatastart(SB)
   344	JB	racecallatomic_ignore
   345	CMPQ	R12, runtime·racedataend(SB)
   346	JAE	racecallatomic_ignore
   347racecallatomic_ok:
   348	// Addr is within the good range, call the atomic function.
   349	MOVQ	g_racectx(R14), RARG0	// goroutine context
   350	MOVQ	8(SP), RARG1	// caller pc
   351	MOVQ	(SP), RARG2	// pc
   352	LEAQ	16(SP), RARG3	// arguments
   353	JMP	racecall<>(SB)	// does not return
   354racecallatomic_ignore:
   355	// Addr is outside the good range.
   356	// Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
   357	// An attempt to synchronize on the address would cause crash.
   358	MOVQ	AX, BX	// remember the original function
   359	MOVQ	$__tsan_go_ignore_sync_begin(SB), AX
   360	MOVQ	g_racectx(R14), RARG0	// goroutine context
   361	CALL	racecall<>(SB)
   362	MOVQ	BX, AX	// restore the original function
   363	// Call the atomic function.
   364	MOVQ	g_racectx(R14), RARG0	// goroutine context
   365	MOVQ	8(SP), RARG1	// caller pc
   366	MOVQ	(SP), RARG2	// pc
   367	LEAQ	16(SP), RARG3	// arguments
   368	CALL	racecall<>(SB)
   369	// Call __tsan_go_ignore_sync_end.
   370	MOVQ	$__tsan_go_ignore_sync_end(SB), AX
   371	MOVQ	g_racectx(R14), RARG0	// goroutine context
   372	JMP	racecall<>(SB)
   373
   374// void runtime·racecall(void(*f)(...), ...)
   375// Calls C function f from race runtime and passes up to 4 arguments to it.
   376// The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
   377TEXT	runtime·racecall(SB), NOSPLIT, $0-0
   378	MOVQ	fn+0(FP), AX
   379	MOVQ	arg0+8(FP), RARG0
   380	MOVQ	arg1+16(FP), RARG1
   381	MOVQ	arg2+24(FP), RARG2
   382	MOVQ	arg3+32(FP), RARG3
   383	JMP	racecall<>(SB)
   384
   385// Switches SP to g0 stack and calls (AX). Arguments already set.
   386TEXT	racecall<>(SB), NOSPLIT|NOFRAME, $0-0
   387	MOVQ	g_m(R14), R13
   388	// Switch to g0 stack.
   389	MOVQ	SP, R12		// callee-saved, preserved across the CALL
   390	MOVQ	m_g0(R13), R10
   391	CMPQ	R10, R14
   392	JE	call	// already on g0
   393	MOVQ	(g_sched+gobuf_sp)(R10), SP
   394call:
   395	ANDQ	$~15, SP	// alignment for gcc ABI
   396	CALL	AX
   397	MOVQ	R12, SP
   398	// Back to Go world, set special registers.
   399	// The g register (R14) is preserved in C.
   400	XORPS	X15, X15
   401	RET
   402
   403// C->Go callback thunk that allows to call runtime·racesymbolize from C code.
   404// Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
   405// The overall effect of Go->C->Go call chain is similar to that of mcall.
   406// RARG0 contains command code. RARG1 contains command-specific context.
   407// See racecallback for command codes.
   408TEXT	runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0-0
   409	// Handle command raceGetProcCmd (0) here.
   410	// First, code below assumes that we are on curg, while raceGetProcCmd
   411	// can be executed on g0. Second, it is called frequently, so will
   412	// benefit from this fast path.
   413	CMPQ	RARG0, $0
   414	JNE	rest
   415	get_tls(RARG0)
   416	MOVQ	g(RARG0), RARG0
   417	MOVQ	g_m(RARG0), RARG0
   418	MOVQ	m_p(RARG0), RARG0
   419	MOVQ	p_raceprocctx(RARG0), RARG0
   420	MOVQ	RARG0, (RARG1)
   421	RET
   422
   423rest:
   424	// Transition from C ABI to Go ABI.
   425	PUSH_REGS_HOST_TO_ABI0()
   426	// Set g = g0.
   427	get_tls(R12)
   428	MOVQ	g(R12), R14
   429	MOVQ	g_m(R14), R13
   430	MOVQ	m_g0(R13), R15
   431	CMPQ	R13, R15
   432	JEQ	noswitch	// branch if already on g0
   433	MOVQ	R15, g(R12)	// g = m->g0
   434	MOVQ	R15, R14	// set g register
   435	PUSHQ	RARG1	// func arg
   436	PUSHQ	RARG0	// func arg
   437	CALL	runtime·racecallback(SB)
   438	POPQ	R12
   439	POPQ	R12
   440	// All registers are smashed after Go code, reload.
   441	get_tls(R12)
   442	MOVQ	g(R12), R13
   443	MOVQ	g_m(R13), R13
   444	MOVQ	m_curg(R13), R14
   445	MOVQ	R14, g(R12)	// g = m->curg
   446ret:
   447	POP_REGS_HOST_TO_ABI0()
   448	RET
   449
   450noswitch:
   451	// already on g0
   452	PUSHQ	RARG1	// func arg
   453	PUSHQ	RARG0	// func arg
   454	CALL	runtime·racecallback(SB)
   455	POPQ	R12
   456	POPQ	R12
   457	JMP	ret

View as plain text