From e3b4b7baad555f74b6fbc0ddc00d46ed0ac03a0a Mon Sep 17 00:00:00 2001 From: Jordan Rhee Date: Tue, 18 Dec 2018 12:54:23 -0800 Subject: runtime: use QPC for nanotime and time.now on windows/arm The previous implementation of nanotime and time.now used a time source that was updated on the system clock tick, which has a maximum resolution of about 1ms. On 386 and amd64, this time source maps to the system performance counter, so has much higher resolution. On ARM, use QueryPerformanceCounter() to get a high resolution timestamp. Updates #26148 Change-Id: I1abc99baf927a95b472ac05020a7788626c71d08 Reviewed-on: https://go-review.googlesource.com/c/154758 Reviewed-by: Brad Fitzpatrick Run-TryBot: Brad Fitzpatrick TryBot-Result: Gobot Gobot --- src/runtime/os_windows.go | 13 +++-- src/runtime/sys_windows_arm.s | 109 ++---------------------------------------- 2 files changed, 12 insertions(+), 110 deletions(-) diff --git a/src/runtime/os_windows.go b/src/runtime/os_windows.go index 9b34589874..20fe01c403 100644 --- a/src/runtime/os_windows.go +++ b/src/runtime/os_windows.go @@ -198,9 +198,12 @@ func loadOptionalSyscalls() { } _NtWaitForSingleObject = windowsFindfunc(n32, []byte("NtWaitForSingleObject\000")) - if windowsFindfunc(n32, []byte("wine_get_version\000")) != nil { - // running on Wine - initWine(k32) + underWine := windowsFindfunc(n32, []byte("wine_get_version\000")) != nil + if underWine || GOARCH == "arm" { + initQPC(k32) + } + if underWine { + initWine() } } @@ -357,7 +360,7 @@ func nowQPC() (sec int64, nsec int32, mono int64) { return } -func initWine(k32 uintptr) { +func initQPC(k32 uintptr) { _GetSystemTimeAsFileTime = windowsFindfunc(k32, []byte("GetSystemTimeAsFileTime\000")) if _GetSystemTimeAsFileTime == nil { throw("could not find GetSystemTimeAsFileTime() syscall") @@ -394,7 +397,9 @@ func initWine(k32 uintptr) { // We have to do it this way (or similar), since multiplying QPC counter by 100 millions overflows // int64 and resulted time will always be invalid. qpcMultiplier = int64(timediv(1000000000, qpcFrequency, nil)) +} +func initWine() { useQPCTime = 1 } diff --git a/src/runtime/sys_windows_arm.s b/src/runtime/sys_windows_arm.s index 60be74b95c..514dc5223e 100644 --- a/src/runtime/sys_windows_arm.s +++ b/src/runtime/sys_windows_arm.s @@ -487,115 +487,12 @@ TEXT runtime·read_tls_fallback(SB),NOSPLIT|NOFRAME,$0 MOVW R0, (R0) RET -// See http://www.dcl.hpi.uni-potsdam.de/research/WRK/2007/08/getting-os-information-the-kuser_shared_data-structure/ -// Must read hi1, then lo, then hi2. The snapshot is valid if hi1 == hi2. -#define _INTERRUPT_TIME 0x7ffe0008 -#define _SYSTEM_TIME 0x7ffe0014 -#define time_lo 0 -#define time_hi1 4 -#define time_hi2 8 - -TEXT runtime·nanotime(SB),NOSPLIT,$0-8 - MOVW $0, R0 - MOVB runtime·useQPCTime(SB), R0 - CMP $0, R0 - BNE useQPC - MOVW $_INTERRUPT_TIME, R3 -loop: - MOVW time_hi1(R3), R1 - MOVW time_lo(R3), R0 - MOVW time_hi2(R3), R2 - CMP R1, R2 - BNE loop - - // wintime = R1:R0, multiply by 100 - MOVW $100, R2 - MULLU R0, R2, (R4, R3) // R4:R3 = R1:R0 * R2 - MULA R1, R2, R4, R4 - - // wintime*100 = R4:R3 - MOVW R3, ret_lo+0(FP) - MOVW R4, ret_hi+4(FP) - RET -useQPC: +TEXT runtime·nanotime(SB),NOSPLIT|NOFRAME,$0-8 B runtime·nanotimeQPC(SB) // tail call RET -TEXT time·now(SB),NOSPLIT,$0-20 - MOVW $0, R0 - MOVB runtime·useQPCTime(SB), R0 - CMP $0, R0 - BNE useQPC - MOVW $_INTERRUPT_TIME, R3 -loop: - MOVW time_hi1(R3), R1 - MOVW time_lo(R3), R0 - MOVW time_hi2(R3), R2 - CMP R1, R2 - BNE loop - - // wintime = R1:R0, multiply by 100 - MOVW $100, R2 - MULLU R0, R2, (R4, R3) // R4:R3 = R1:R0 * R2 - MULA R1, R2, R4, R4 - - // wintime*100 = R4:R3 - MOVW R3, mono+12(FP) - MOVW R4, mono+16(FP) - - MOVW $_SYSTEM_TIME, R3 -wall: - MOVW time_hi1(R3), R1 - MOVW time_lo(R3), R0 - MOVW time_hi2(R3), R2 - CMP R1, R2 - BNE wall - - // w = R1:R0 in 100ns untis - // convert to Unix epoch (but still 100ns units) - #define delta 116444736000000000 - SUB.S $(delta & 0xFFFFFFFF), R0 - SBC $(delta >> 32), R1 - - // Convert to nSec - MOVW $100, R2 - MULLU R0, R2, (R4, R3) // R4:R3 = R1:R0 * R2 - MULA R1, R2, R4, R4 - // w = R2:R1 in nSec - MOVW R3, R1 // R4:R3 -> R2:R1 - MOVW R4, R2 - - // multiply nanoseconds by reciprocal of 10**9 (scaled by 2**61) - // to get seconds (96 bit scaled result) - MOVW $0x89705f41, R3 // 2**61 * 10**-9 - MULLU R1,R3,(R6,R5) // R7:R6:R5 = R2:R1 * R3 - MOVW $0,R7 - MULALU R2,R3,(R7,R6) - - // unscale by discarding low 32 bits, shifting the rest by 29 - MOVW R6>>29,R6 // R7:R6 = (R7:R6:R5 >> 61) - ORR R7<<3,R6 - MOVW R7>>29,R7 - - // subtract (10**9 * sec) from nsec to get nanosecond remainder - MOVW $1000000000, R5 // 10**9 - MULLU R6,R5,(R9,R8) // R9:R8 = R7:R6 * R5 - MULA R7,R5,R9,R9 - SUB.S R8,R1 // R2:R1 -= R9:R8 - SBC R9,R2 - - // because reciprocal was a truncated repeating fraction, quotient - // may be slightly too small -- adjust to make remainder < 10**9 - CMP R5,R1 // if remainder > 10**9 - SUB.HS R5,R1 // remainder -= 10**9 - ADD.HS $1,R6 // sec += 1 - - MOVW R6,sec_lo+0(FP) - MOVW R7,sec_hi+4(FP) - MOVW R1,nsec+8(FP) - RET -useQPC: - B runtime·nanotimeQPC(SB) // tail call +TEXT time·now(SB),NOSPLIT|NOFRAME,$0-20 + B runtime·nowQPC(SB) // tail call RET // save_g saves the g register (R10) into thread local memory -- cgit v1.2.3-54-g00ecf