aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/sys_windows_386.s
diff options
context:
space:
mode:
authorRuss Cox <rsc@golang.org>2017-02-03 19:26:13 -0500
committerRuss Cox <rsc@golang.org>2017-02-09 14:45:16 +0000
commite4371fb179ad69cbd057f2430120843948e09f2f (patch)
tree4e49771324f1f6bd35b2b962a9511fb9bb071455 /src/runtime/sys_windows_386.s
parent3a6842a0ecf66cf06ce4f0a5fcb9c09fbfdbecc1 (diff)
downloadgo-e4371fb179ad69cbd057f2430120843948e09f2f.tar.gz
go-e4371fb179ad69cbd057f2430120843948e09f2f.zip
time: optimize Now on darwin, windows
Fetch both monotonic and wall time together when possible. Avoids skew and is cheaper. Also shave a few ns off in conversion in package time. Compared to current implementation (after monotonic changes): name old time/op new time/op delta Now 19.6ns ± 1% 9.7ns ± 1% -50.63% (p=0.000 n=41+49) darwin/amd64 Now 23.5ns ± 4% 10.6ns ± 5% -54.61% (p=0.000 n=30+28) windows/amd64 Now 54.5ns ± 5% 29.8ns ± 9% -45.40% (p=0.000 n=27+29) windows/386 More importantly, compared to Go 1.8: name old time/op new time/op delta Now 9.5ns ± 1% 9.7ns ± 1% +1.94% (p=0.000 n=41+49) darwin/amd64 Now 12.9ns ± 5% 10.6ns ± 5% -17.73% (p=0.000 n=30+28) windows/amd64 Now 15.3ns ± 5% 29.8ns ± 9% +94.36% (p=0.000 n=30+29) windows/386 This brings time.Now back in line with Go 1.8 on darwin/amd64 and windows/amd64. It's not obvious why windows/386 is still noticeably worse than Go 1.8, but it's better than before this CL. The windows/386 speed is not too important; the changes just keep the two architectures similar. Change-Id: If69b94970c8a1a57910a371ee91e0d4e82e46c5d Reviewed-on: https://go-review.googlesource.com/36428 Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
Diffstat (limited to 'src/runtime/sys_windows_386.s')
-rw-r--r--src/runtime/sys_windows_386.s104
1 files changed, 96 insertions, 8 deletions
diff --git a/src/runtime/sys_windows_386.s b/src/runtime/sys_windows_386.s
index 42583dd106..9d53fbf319 100644
--- a/src/runtime/sys_windows_386.s
+++ b/src/runtime/sys_windows_386.s
@@ -152,7 +152,7 @@ done:
// RET 4 (return and pop 4 bytes parameters)
BYTE $0xC2; WORD $4
RET // unreached; make assembler happy
-
+
TEXT runtime·exceptiontramp(SB),NOSPLIT,$0
MOVL $runtime·exceptionhandler(SB), AX
JMP runtime·sigtramp(SB)
@@ -432,15 +432,103 @@ TEXT runtime·switchtothread(SB),NOSPLIT,$0
MOVL BP, SP
RET
-// func walltime() (sec int64, nsec int32)
-TEXT runtime·walltime(SB),NOSPLIT,$8-12
- CALL runtime·unixnano(SB)
- MOVL 0(SP), AX
- MOVL 4(SP), DX
+// See http://www.dcl.hpi.uni-potsdam.de/research/WRK/2007/08/getting-os-information-the-kuser_shared_data-structure/
+// Must read hi1, then lo, then hi2. The snapshot is valid if hi1 == hi2.
+#define _INTERRUPT_TIME 0x7ffe0008
+#define _SYSTEM_TIME 0x7ffe0014
+#define time_lo 0
+#define time_hi1 4
+#define time_hi2 8
+
+TEXT runtime·nanotime(SB),NOSPLIT,$0-8
+loop:
+ MOVL (_INTERRUPT_TIME+time_hi1), AX
+ MOVL (_INTERRUPT_TIME+time_lo), CX
+ MOVL (_INTERRUPT_TIME+time_hi2), DI
+ CMPL AX, DI
+ JNE loop
+
+ // wintime = DI:CX, multiply by 100
+ MOVL $100, AX
+ MULL CX
+ IMULL $100, DI
+ ADDL DI, DX
+ // wintime*100 = DX:AX, subtract startNano and return
+ SUBL runtime·startNano+0(SB), AX
+ SBBL runtime·startNano+4(SB), DX
+ MOVL AX, ret+0(FP)
+ MOVL DX, ret+4(FP)
+ RET
+TEXT time·now(SB),NOSPLIT,$0-20
+loop:
+ MOVL (_INTERRUPT_TIME+time_hi1), AX
+ MOVL (_INTERRUPT_TIME+time_lo), CX
+ MOVL (_INTERRUPT_TIME+time_hi2), DI
+ CMPL AX, DI
+ JNE loop
+
+ // w = DI:CX
+ // multiply by 100
+ MOVL $100, AX
+ MULL CX
+ IMULL $100, DI
+ ADDL DI, DX
+ // w*100 = DX:AX
+ // subtract startNano and save for return
+ SUBL runtime·startNano+0(SB), AX
+ SBBL runtime·startNano+4(SB), DX
+ MOVL AX, mono+12(FP)
+ MOVL DX, mono+16(FP)
+
+wall:
+ MOVL (_SYSTEM_TIME+time_hi1), CX
+ MOVL (_SYSTEM_TIME+time_lo), AX
+ MOVL (_SYSTEM_TIME+time_hi2), DX
+ CMPL CX, DX
+ JNE wall
+
+ // w = DX:AX
+ // convert to Unix epoch (but still 100ns units)
+ #define delta 116444736000000000
+ SUBL $(delta & 0xFFFFFFFF), AX
+ SBBL $(delta >> 32), DX
+
+ // nano/100 = DX:AX
+ // split into two decimal halves by div 1e9.
+ // (decimal point is two spots over from correct place,
+ // but we avoid overflow in the high word.)
MOVL $1000000000, CX
DIVL CX
+ MOVL AX, DI
+ MOVL DX, SI
+
+ // DI = nano/100/1e9 = nano/1e11 = sec/100, DX = SI = nano/100%1e9
+ // split DX into seconds and nanoseconds by div 1e7 magic multiply.
+ MOVL DX, AX
+ MOVL $1801439851, CX
+ MULL CX
+ SHRL $22, DX
+ MOVL DX, BX
+ IMULL $10000000, DX
+ MOVL SI, CX
+ SUBL DX, CX
+
+ // DI = sec/100 (still)
+ // BX = (nano/100%1e9)/1e7 = (nano/1e9)%100 = sec%100
+ // CX = (nano/100%1e9)%1e7 = (nano%1e9)/100 = nsec/100
+ // store nsec for return
+ IMULL $100, CX
+ MOVL CX, nsec+8(FP)
+
+ // DI = sec/100 (still)
+ // BX = sec%100
+ // construct DX:AX = 64-bit sec and store for return
+ MOVL $0, DX
+ MOVL $100, AX
+ MULL DI
+ ADDL BX, AX
+ ADCL $0, DX
MOVL AX, sec+0(FP)
- MOVL $0, sec+4(FP)
- MOVL DX, nsec+8(FP)
+ MOVL DX, sec+4(FP)
RET