aboutsummaryrefslogtreecommitdiff
path: root/src/math/cbrt_s390x.s
blob: 87bba531b8cb8d9c38edbf650cf74d599e2c4c0e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#include "textflag.h"

// Minimax polynomial coefficients and other constants
DATA ·cbrtrodataL9<> + 0(SB)/8, $-.00016272731015974436E+00
DATA ·cbrtrodataL9<> + 8(SB)/8, $0.66639548758285293179E+00
DATA ·cbrtrodataL9<> + 16(SB)/8, $0.55519402697349815993E+00
DATA ·cbrtrodataL9<> + 24(SB)/8, $0.49338566048766782004E+00
DATA ·cbrtrodataL9<> + 32(SB)/8, $0.45208160036325611486E+00
DATA ·cbrtrodataL9<> + 40(SB)/8, $0.43099892837778637816E+00
DATA ·cbrtrodataL9<> + 48(SB)/8, $1.000244140625
DATA ·cbrtrodataL9<> + 56(SB)/8, $0.33333333333333333333E+00
DATA ·cbrtrodataL9<> + 64(SB)/8, $79228162514264337593543950336.
GLOBL ·cbrtrodataL9<> + 0(SB), RODATA, $72

// Index tables
DATA ·cbrttab32069<> + 0(SB)/8, $0x404030303020202
DATA ·cbrttab32069<> + 8(SB)/8, $0x101010101000000
DATA ·cbrttab32069<> + 16(SB)/8, $0x808070706060605
DATA ·cbrttab32069<> + 24(SB)/8, $0x505040404040303
DATA ·cbrttab32069<> + 32(SB)/8, $0xe0d0c0c0b0b0b0a
DATA ·cbrttab32069<> + 40(SB)/8, $0xa09090908080808
DATA ·cbrttab32069<> + 48(SB)/8, $0x11111010100f0f0f
DATA ·cbrttab32069<> + 56(SB)/8, $0xe0e0e0e0e0d0d0d
DATA ·cbrttab32069<> + 64(SB)/8, $0x1515141413131312
DATA ·cbrttab32069<> + 72(SB)/8, $0x1212111111111010
GLOBL ·cbrttab32069<> + 0(SB), RODATA, $80

DATA ·cbrttab22068<> + 0(SB)/8, $0x151015001420141
DATA ·cbrttab22068<> + 8(SB)/8, $0x140013201310130
DATA ·cbrttab22068<> + 16(SB)/8, $0x122012101200112
DATA ·cbrttab22068<> + 24(SB)/8, $0x111011001020101
DATA ·cbrttab22068<> + 32(SB)/8, $0x10000f200f100f0
DATA ·cbrttab22068<> + 40(SB)/8, $0xe200e100e000d2
DATA ·cbrttab22068<> + 48(SB)/8, $0xd100d000c200c1
DATA ·cbrttab22068<> + 56(SB)/8, $0xc000b200b100b0
DATA ·cbrttab22068<> + 64(SB)/8, $0xa200a100a00092
DATA ·cbrttab22068<> + 72(SB)/8, $0x91009000820081
DATA ·cbrttab22068<> + 80(SB)/8, $0x80007200710070
DATA ·cbrttab22068<> + 88(SB)/8, $0x62006100600052
DATA ·cbrttab22068<> + 96(SB)/8, $0x51005000420041
DATA ·cbrttab22068<> + 104(SB)/8, $0x40003200310030
DATA ·cbrttab22068<> + 112(SB)/8, $0x22002100200012
DATA ·cbrttab22068<> + 120(SB)/8, $0x11001000020001
GLOBL ·cbrttab22068<> + 0(SB), RODATA, $128

DATA ·cbrttab12067<> + 0(SB)/8, $0x53e1529051324fe1
DATA ·cbrttab12067<> + 8(SB)/8, $0x4e904d324be14a90
DATA ·cbrttab12067<> + 16(SB)/8, $0x493247e146904532
DATA ·cbrttab12067<> + 24(SB)/8, $0x43e1429041323fe1
DATA ·cbrttab12067<> + 32(SB)/8, $0x3e903d323be13a90
DATA ·cbrttab12067<> + 40(SB)/8, $0x393237e136903532
DATA ·cbrttab12067<> + 48(SB)/8, $0x33e1329031322fe1
DATA ·cbrttab12067<> + 56(SB)/8, $0x2e902d322be12a90
DATA ·cbrttab12067<> + 64(SB)/8, $0xd3e1d290d132cfe1
DATA ·cbrttab12067<> + 72(SB)/8, $0xce90cd32cbe1ca90
DATA ·cbrttab12067<> + 80(SB)/8, $0xc932c7e1c690c532
DATA ·cbrttab12067<> + 88(SB)/8, $0xc3e1c290c132bfe1
DATA ·cbrttab12067<> + 96(SB)/8, $0xbe90bd32bbe1ba90
DATA ·cbrttab12067<> + 104(SB)/8, $0xb932b7e1b690b532
DATA ·cbrttab12067<> + 112(SB)/8, $0xb3e1b290b132afe1
DATA ·cbrttab12067<> + 120(SB)/8, $0xae90ad32abe1aa90
GLOBL ·cbrttab12067<> + 0(SB), RODATA, $128

// Cbrt returns the cube root of the argument.
//
// Special cases are:
//      Cbrt(±0) = ±0
//      Cbrt(±Inf) = ±Inf
//      Cbrt(NaN) = NaN
// The algorithm used is minimax polynomial approximation
// with coefficients determined with a Remez exchange algorithm.

TEXT	·cbrtAsm(SB), NOSPLIT, $0-16
	FMOVD	x+0(FP), F0
	MOVD	$·cbrtrodataL9<>+0(SB), R9
	LGDR	F0, R2
	WORD	$0xC039000F	//iilf	%r3,1048575
	BYTE	$0xFF
	BYTE	$0xFF
	SRAD	$32, R2
	WORD	$0xB9170012	//llgtr	%r1,%r2
	MOVW	R1, R6
	MOVW	R3, R7
	CMPBLE	R6, R7, L2
	WORD	$0xC0397FEF	//iilf	%r3,2146435071
	BYTE	$0xFF
	BYTE	$0xFF
	MOVW	R3, R7
	CMPBLE	R6, R7, L8
L1:
	FMOVD	F0, ret+8(FP)
	RET
L3:
L2:
	LTDBR	F0, F0
	BEQ	L1
	FMOVD	F0, F2
	WORD	$0xED209040	//mdb	%f2,.L10-.L9(%r9)
	BYTE	$0x00
	BYTE	$0x1C
	MOVH	$0x200, R4
	LGDR	F2, R2
	SRAD	$32, R2
L4:
	RISBGZ	$57, $62, $39, R2, R3
	MOVD	$·cbrttab12067<>+0(SB), R1
	WORD	$0x48131000	//lh	%r1,0(%r3,%r1)
	RISBGZ	$57, $62, $45, R2, R3
	MOVD	$·cbrttab22068<>+0(SB), R5
	RISBGNZ	$60, $63, $48, R2, R2
	WORD	$0x4A135000	//ah	%r1,0(%r3,%r5)
	BYTE	$0x18	//lr	%r3,%r1
	BYTE	$0x31
	MOVD	$·cbrttab32069<>+0(SB), R1
	FMOVD	56(R9), F1
	FMOVD	48(R9), F5
	WORD	$0xEC23393B	//rosbg	%r2,%r3,57,59,4
	BYTE	$0x04
	BYTE	$0x56
	WORD	$0xE3121000	//llc	%r1,0(%r2,%r1)
	BYTE	$0x00
	BYTE	$0x94
	ADDW	R3, R1
	ADDW	R4, R1
	SLW	$16, R1, R1
	SLD	$32, R1, R1
	LDGR	R1, F2
	WFMDB	V2, V2, V4
	WFMDB	V4, V0, V6
	WFMSDB	V4, V6, V2, V4
	FMOVD	40(R9), F6
	FMSUB	F1, F4, F2
	FMOVD	32(R9), F4
	WFMDB	V2, V2, V3
	FMOVD	24(R9), F1
	FMUL	F3, F0
	FMOVD	16(R9), F3
	WFMADB	V2, V0, V5, V2
	FMOVD	8(R9), F5
	FMADD	F6, F2, F4
	WFMADB	V2, V1, V3, V1
	WFMDB	V2, V2, V6
	FMOVD	0(R9), F3
	WFMADB	V4, V6, V1, V4
	WFMADB	V2, V5, V3, V2
	FMADD	F4, F6, F2
	FMADD	F2, F0, F0
	FMOVD	F0, ret+8(FP)
	RET
L8:
	MOVH	$0x0, R4
	BR	L4